diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e8a5db28ea0a4..e7da98de2400d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -401,6 +401,11 @@ static cl::opt EnableEarlyExitVectorization( cl::desc( "Enable vectorization of early exit loops with uncountable exits.")); +static cl::opt TryToKeepUnifromBranches( + "vect-keep-uniform-branches", cl::init(false), cl::Hidden, + cl::desc("Enable preservation of uniform branch conditions " + "when vectorizing.")); + // Likelyhood of bypassing the vectorized loop because assumptions about SCEV // variables not overflowing do not hold. See `emitSCEVChecks`. static constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127}; @@ -2932,7 +2937,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI, void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { // Fix widened non-induction PHIs by setting up the PHI operands. - if (EnableVPlanNativePath) + if (EnableVPlanNativePath || TryToKeepUnifromBranches) fixNonInductionPHIs(State); // After vectorization, the exit blocks of the original loop will have @@ -7577,6 +7582,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { BestPlan.getVectorLoopRegion()->getSingleSuccessor() != BestPlan.getMiddleBlock(); assert((BestFactor.Width == LegacyVF.Width || PlanForEarlyExitLoop || + TryToKeepUnifromBranches || planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width), CostCtx, OrigLoop) || planContainsAdditionalSimplifications(getPlanFor(LegacyVF.Width), @@ -9295,6 +9301,106 @@ static void addExitUsersForFirstOrderRecurrences( } } +/// Given a VPlan Dominator Tree \p DT that represents the CFG before +/// if-conversion and a block with a conditional branch \p VPBB, +/// find the basic block where the two distinct (but possibly empty) +/// single-exit single-entry subregions on the two sides of that branch +/// join back together, as well as the blocks exiting the two subregions. +/// The join block has to have only these two predecessors. +/// If no such join block and regions was found, return std::nullopt. +static std::optional> +canKeepBranchDuringIfConversion(const VPDominatorTree &DT, VPBasicBlock *VPBB, + VPlanHCFGBuilder &HCFGBuilder) { + const VPRegionBlock *Region = VPBB->getParent(); + auto FindSubregionExit = + [&](VPBasicBlock *Pred, + VPBlockBase *Entry) -> std::pair { + // The branch preservation is restricted to cases where + // the SESEs are completely empty or have a dedicated entry and exit. + // Because of the way the VPlan is flattened, the entry could already + // have gotten predecessors removed, so check based on the IR. + if (HCFGBuilder.getIRBBForVPB(Entry)->hasNPredecessorsOrMore(2)) + return {Pred, Entry}; + + // Build the biggest possible SESE with the entry Entry. + // As the DT is not updated during flattening, even if other edges + // entering the SESE would have already been removed, the fact + // that there used to be one will be detected. + VPBlockBase *Exiting = nullptr; + SmallSetVector Worklist; + Worklist.insert(Entry); + for (unsigned I = 0; I < Worklist.size(); I++) { + auto *BB = Worklist[I]; + assert(BB->getParent() == Region); + for (auto *Succ : BB->getSuccessors()) { + if (DT.dominates(Entry, Succ)) + Worklist.insert(Succ); + else if (Exiting || BB->getNumSuccessors() != 1) + return {nullptr, nullptr}; + else + Exiting = BB; + } + } + + return {Exiting, Exiting->getSingleSuccessor()}; + }; + + auto [LHSExiting, LHSSucc] = + FindSubregionExit(VPBB, VPBB->getSuccessors()[0]); + auto [RHSExiting, RHSSucc] = + FindSubregionExit(VPBB, VPBB->getSuccessors()[1]); + if (!LHSExiting || !RHSExiting || LHSSucc != RHSSucc || + LHSSucc->getNumPredecessors() != 2) + return std::nullopt; + + return std::tuple(LHSSucc, LHSExiting, RHSExiting); +} + +/// Given a basic block \p BranchBB, \p JoinBB, and a pair of blocks +/// that represent the original successor of \p BranchBB and exits +/// (or the \p BranchBB in case of a direct jump to \p JoinBB) of the +/// single-entry single-exit subregions, introduce the branch +/// back into the control flow. +static void reconnectVPlanCFGForPreservedBranch( + VPBasicBlock *BranchBB, std::pair LeftSESE, + std::pair RightSESE, VPBasicBlock *JoinBB, + VPRecipeBuilder &RecipeBuilder, VPlanHCFGBuilder &HCFGBuilder) { + + // Disconnect the entries/exits of the regions from their RPO + // predecessors/successors, and then re-connect them. + for (auto [Entry, Exiting] : {LeftSESE, RightSESE}) { + if (auto *Pred = Entry->getSinglePredecessor()) + VPBlockUtils::disconnectBlocks(Pred, Entry); + if (auto *Succ = Exiting->getSingleSuccessor()) + VPBlockUtils::disconnectBlocks(Exiting, Succ); + } + for (auto [Entry, Exiting] : {LeftSESE, RightSESE}) + if (Exiting == BranchBB) { + VPBlockUtils::connectBlocks(BranchBB, JoinBB); + } else { + VPBlockUtils::connectBlocks(BranchBB, Entry); + VPBlockUtils::connectBlocks(Exiting, JoinBB); + } + + // The mask of the join block is that of the block with the branch. + RecipeBuilder.setBlockInMask( + HCFGBuilder.getIRBBForVPB(JoinBB), + RecipeBuilder.getBlockInMask(HCFGBuilder.getIRBBForVPB(BranchBB))); + + // Make sure the phi nodes in JoinBB are not replaced by blends. + for (auto &R : JoinBB->phis()) { + auto *Phi = cast(&R); + auto *IRPhi = cast(Phi->getUnderlyingValue()); + RecipeBuilder.setRecipe(IRPhi, Phi); + Phi->setOperand( + 0, RecipeBuilder.getVPValueOrAddLiveIn(IRPhi->getIncomingValueForBlock( + HCFGBuilder.getIRBBForVPB(LeftSESE.second)))); + Phi->setOperand( + 1, RecipeBuilder.getVPValueOrAddLiveIn(IRPhi->getIncomingValueForBlock( + HCFGBuilder.getIRBBForVPB(RightSESE.second)))); + } +} + VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { @@ -9390,6 +9496,18 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { auto *MiddleVPBB = Plan->getMiddleBlock(); + // A map of the block where the sub-regions on the left and right side + // of a perservable uniform branch join back together. + DenseMap, + std::pair>> + PreservableUniformBranches; + + // Purposefully not updated during construction: + VPDominatorTree VPDT; + if (TryToKeepUnifromBranches) + VPDT.recalculate(*Plan); + // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. ReversePostOrderTraversal> RPOT( @@ -9398,6 +9516,17 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi(); VPBlockBase *PrevVPBB = nullptr; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(RPOT)) { + // Handle a block where a preservable uniform branch joins back together. + bool KeepPhis = false; + if (auto Iter = PreservableUniformBranches.find(VPBB); + Iter != PreservableUniformBranches.end()) { + auto [BranchBB, LeftSESE, RightSESE] = Iter->second; + reconnectVPlanCFGForPreservedBranch(BranchBB, LeftSESE, RightSESE, VPBB, + RecipeBuilder, HCFGBuilder); + PrevVPBB = nullptr; + KeepPhis = true; + } + // Handle VPBBs down to the latch. if (VPBB == LoopRegion->getExiting()) { assert(!HCFGBuilder.getIRBBForVPB(VPBB) && @@ -9408,15 +9537,16 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { // Create mask based on the IR BB corresponding to VPBB. // TODO: Predicate directly based on VPlan. + BasicBlock *IRBB = HCFGBuilder.getIRBBForVPB(VPBB); Builder.setInsertPoint(VPBB, VPBB->begin()); if (VPBB == HeaderVPBB) { Builder.setInsertPoint(VPBB, VPBB->getFirstNonPhi()); RecipeBuilder.createHeaderMask(); - } else if (NeedsMasks) { + } else if (NeedsMasks && !RecipeBuilder.hasBlockInMask(IRBB)) { // FIXME: At the moment, masks need to be placed at the beginning of the // block, as blends introduced for phi nodes need to use it. The created // blends should be sunk after the mask recipes. - RecipeBuilder.createBlockInMask(HCFGBuilder.getIRBBForVPB(VPBB)); + RecipeBuilder.createBlockInMask(IRBB); } // Convert input VPInstructions to widened recipes. @@ -9429,7 +9559,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { // FIXME: Migrate code relying on the underlying instruction from VPlan0 // to construct recipes below to not use the underlying instruction. if (isa(&R) || - (isa(&R) && !UnderlyingValue)) + (isa(&R) && !UnderlyingValue) || + (isa(&R) && KeepPhis)) continue; // FIXME: VPlan0, which models a copy of the original scalar loop, should @@ -9437,6 +9568,31 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { assert((isa(&R) || isa(&R)) && UnderlyingValue && "unsupported recipe"); + // Check if the branch can be kept, and if so, remember it for so that the + // CFG can be reconnected later and set the successor masks. + if (auto *Br = dyn_cast(&R); + Br && Br->getOpcode() == VPInstruction::BranchOnCond && + TryToKeepUnifromBranches && + Legal->isInvariant(Br->getUnderlyingInstr()->getOperand(0))) { + if (auto JoinInfo = + canKeepBranchDuringIfConversion(VPDT, VPBB, HCFGBuilder)) { + auto *IRBr = cast(UnderlyingValue); + Br->setOperand( + 0, RecipeBuilder.getVPValueOrAddLiveIn(IRBr->getCondition())); + VPValue *Mask = RecipeBuilder.getBlockInMask(IRBr->getParent()); + RecipeBuilder.setBlockInMask(IRBr->getSuccessor(0), Mask); + RecipeBuilder.setBlockInMask(IRBr->getSuccessor(1), Mask); + auto [JoinBB, LHSExiting, RHSExiting] = JoinInfo.value(); + PreservableUniformBranches[JoinBB] = { + VPBB, + {VPBB->getSuccessors()[0], LHSExiting}, + {VPBB->getSuccessors()[1], RHSExiting}}; + LLVM_DEBUG(dbgs() << "LV: Preserving uniform branch: "; Br->dump(); + dbgs() << ", joins at: " << JoinBB->getName() << "\n"); + break; + } + } + if (isa(&R) && (cast(&R)->getOpcode() == VPInstruction::BranchOnCond || diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index e8d3ad89e14cf..5eb9339726be8 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -198,6 +198,16 @@ class VPRecipeBuilder { /// Returns the *entry* mask for the block \p BB. VPValue *getBlockInMask(BasicBlock *BB) const; + /// Set the block entry block mask for \p BB to \p Mask. + void setBlockInMask(BasicBlock *BB, VPValue *Mask) { + BlockMaskCache[BB] = Mask; + } + + /// Return true of there already is a entry block mask for \p BB. + bool hasBlockInMask(const BasicBlock *BB) const { + return BlockMaskCache.contains(BB); + } + /// Create an edge mask for every destination of cases and/or default. void createSwitchEdgeMasks(SwitchInst *SI); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8089cfd1ce802..15e90bc18bc87 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1974,7 +1974,11 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe { } VPWidenPHIRecipe *clone() override { - llvm_unreachable("cloning not implemented yet"); + auto *Phi = new VPWidenPHIRecipe( + dyn_cast_if_present(getUnderlyingValue())); + for (unsigned I = 0; I < getNumOperands(); I++) + Phi->addOperand(getIncomingValue(I)); + return Phi; } ~VPWidenPHIRecipe() override = default; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index d57a6c481748c..a7f79d4677203 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3643,9 +3643,6 @@ VPBasicBlock *VPWidenPHIRecipe::getIncomingBlock(unsigned I) { } void VPWidenPHIRecipe::execute(VPTransformState &State) { - assert(EnableVPlanNativePath && - "Non-native vplans are not expected to have VPWidenPHIRecipes."); - State.setDebugLocFrom(getDebugLoc()); Value *Op0 = State.get(getOperand(0)); Type *VecTy = Op0->getType(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 6c917e4eef655..bd9fcea424d70 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -396,10 +396,9 @@ static bool mergeBlocksIntoPredecessors(VPlan &Plan) { auto *ParentRegion = cast_or_null(VPBB->getParent()); if (ParentRegion && ParentRegion->getExiting() == VPBB) ParentRegion->setExiting(PredVPBB); - for (auto *Succ : to_vector(VPBB->successors())) { - VPBlockUtils::disconnectBlocks(VPBB, Succ); - VPBlockUtils::connectBlocks(PredVPBB, Succ); - } + for (auto *Succ : to_vector(VPBB->successors())) + VPBlockUtils::replacePredecessor(VPBB, PredVPBB, Succ); + // VPBB is now dead and will be cleaned up when the plan gets destroyed. } return !WorkList.empty(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 6ddb88308955f..cbe268b596372 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -80,6 +80,15 @@ class VPBlockUtils { public: VPBlockUtils() = delete; + /// Disconnect \p OldPred from \p Succ and connect \p NewPred to \p Succ + /// instead, but also swaping phi operands in the successor if necessary. + static void replacePredecessor(VPBlockBase *OldPred, VPBlockBase *NewPred, + VPBlockBase *Succ) { + Succ->replacePredecessor(OldPred, NewPred); + OldPred->removeSuccessor(Succ); + NewPred->appendSuccessor(Succ); + } + /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's @@ -91,10 +100,8 @@ class VPBlockUtils { "Can't insert new block with predecessors or successors."); NewBlock->setParent(BlockPtr->getParent()); SmallVector Succs(BlockPtr->successors()); - for (VPBlockBase *Succ : Succs) { - disconnectBlocks(BlockPtr, Succ); - connectBlocks(NewBlock, Succ); - } + for (VPBlockBase *Succ : Succs) + replacePredecessor(BlockPtr, NewBlock, Succ); connectBlocks(BlockPtr, NewBlock); } diff --git a/llvm/test/Transforms/LoopVectorize/uniform-branches.ll b/llvm/test/Transforms/LoopVectorize/uniform-branches.ll new file mode 100644 index 0000000000000..10e14ed1b7d49 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/uniform-branches.ll @@ -0,0 +1,1919 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=loop-vectorize,simplifycfg -force-target-instruction-cost=1 \ +; RUN: -force-vector-width=2 -force-vector-interleave=1 \ +; RUN: -vect-keep-uniform-branches < %s | FileCheck -check-prefix=CHECK-IC1 %s +; RUN: opt -S -passes=loop-vectorize,simplifycfg -force-target-instruction-cost=1 \ +; RUN: -force-vector-width=2 -force-vector-interleave=2 \ +; RUN: -vect-keep-uniform-branches < %s | FileCheck -check-prefix=CHECK-IC2 %s + + + +define void @test_keep_uniform_condition( +; CHECK-IC1-LABEL: define void @test_keep_uniform_condition( +; CHECK-IC1-SAME: i64 [[N:%.*]], i1 [[UNIFORM_CONDITION:%.*]], ptr [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-IC1-NEXT: [[ENTRY:.*]]: +; CHECK-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC1: [[VECTOR_PH]]: +; CHECK-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC1: [[VECTOR_BODY]]: +; CHECK-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH3:.*]] ] +; CHECK-IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-IC1-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP0]] +; CHECK-IC1-NEXT: br i1 [[UNIFORM_CONDITION]], label %[[IF_TRUE2:.*]], label %[[IF_FALSE1:.*]] +; CHECK-IC1: [[IF_FALSE1]]: +; CHECK-IC1-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[TMP2]], i32 0 +; CHECK-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 +; CHECK-IC1-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH3]] +; CHECK-IC1: [[IF_TRUE2]]: +; CHECK-IC1-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-IC1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] +; CHECK-IC1-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP5]], align 4 +; CHECK-IC1-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP6]], align 4 +; CHECK-IC1-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 +; CHECK-IC1-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i32 1 +; CHECK-IC1-NEXT: [[TMP11:%.*]] = fadd <2 x float> [[TMP10]], [[TMP10]] +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH3]] +; CHECK-IC1: [[LOOP_LATCH3]]: +; CHECK-IC1-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ [[TMP11]], %[[IF_TRUE2]] ], [ [[TMP4]], %[[IF_FALSE1]] ] +; CHECK-IC1-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 +; CHECK-IC1-NEXT: store <2 x float> [[VEC_PHI]], ptr [[TMP12]], align 4 +; CHECK-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-IC1-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-IC1: [[MIDDLE_BLOCK]]: +; CHECK-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC1: [[SCALAR_PH]]: +; CHECK-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC1-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC1: [[LOOP_HEADER]]: +; CHECK-IC1-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC1-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC1-NEXT: br i1 [[UNIFORM_CONDITION]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]] +; CHECK-IC1: [[IF_TRUE]]: +; CHECK-IC1-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC1-NEXT: [[A_VAL:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC1-NEXT: [[ADD:%.*]] = fadd float [[A_VAL]], [[A_VAL]] +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[IF_FALSE]]: +; CHECK-IC1-NEXT: [[B_VAL:%.*]] = load float, ptr [[B_ADDR]], align 4 +; CHECK-IC1-NEXT: [[MUL:%.*]] = fmul float [[B_VAL]], [[B_VAL]] +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[LOOP_LATCH]]: +; CHECK-IC1-NEXT: [[VAL:%.*]] = phi float [ [[ADD]], %[[IF_TRUE]] ], [ [[MUL]], %[[IF_FALSE]] ] +; CHECK-IC1-NEXT: store float [[VAL]], ptr [[B_ADDR]], align 4 +; CHECK-IC1-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-IC1: [[EXIT]]: +; CHECK-IC1-NEXT: ret void +; +; CHECK-IC2-LABEL: define void @test_keep_uniform_condition( +; CHECK-IC2-SAME: i64 [[N:%.*]], i1 [[UNIFORM_CONDITION:%.*]], ptr [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-IC2-NEXT: [[ENTRY:.*]]: +; CHECK-IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC2: [[VECTOR_PH]]: +; CHECK-IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC2: [[VECTOR_BODY]]: +; CHECK-IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH4:.*]] ] +; CHECK-IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-IC2-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP0]] +; CHECK-IC2-NEXT: br i1 [[UNIFORM_CONDITION]], label %[[IF_TRUE3:.*]], label %[[IF_FALSE1:.*]] +; CHECK-IC2: [[IF_FALSE1]]: +; CHECK-IC2-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP4]], i32 0 +; CHECK-IC2-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[TMP4]], i32 2 +; CHECK-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP5]], align 4 +; CHECK-IC2-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP6]], align 4 +; CHECK-IC2-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] +; CHECK-IC2-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]] +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH4]] +; CHECK-IC2: [[IF_TRUE3]]: +; CHECK-IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-IC2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] +; CHECK-IC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP9]], align 4 +; CHECK-IC2-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP10]], align 4 +; CHECK-IC2-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 +; CHECK-IC2-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i32 1 +; CHECK-IC2-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP11]], align 4 +; CHECK-IC2-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP12]], align 4 +; CHECK-IC2-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP17]], i32 0 +; CHECK-IC2-NEXT: [[TMP20:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i32 1 +; CHECK-IC2-NEXT: [[TMP21:%.*]] = fadd <2 x float> [[TMP16]], [[TMP16]] +; CHECK-IC2-NEXT: [[TMP22:%.*]] = fadd <2 x float> [[TMP20]], [[TMP20]] +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH4]] +; CHECK-IC2: [[LOOP_LATCH4]]: +; CHECK-IC2-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ [[TMP21]], %[[IF_TRUE3]] ], [ [[TMP7]], %[[IF_FALSE1]] ] +; CHECK-IC2-NEXT: [[VEC_PHI5:%.*]] = phi <2 x float> [ [[TMP22]], %[[IF_TRUE3]] ], [ [[TMP8]], %[[IF_FALSE1]] ] +; CHECK-IC2-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 0 +; CHECK-IC2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 2 +; CHECK-IC2-NEXT: store <2 x float> [[VEC_PHI]], ptr [[TMP23]], align 4 +; CHECK-IC2-NEXT: store <2 x float> [[VEC_PHI5]], ptr [[TMP24]], align 4 +; CHECK-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-IC2-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-IC2: [[MIDDLE_BLOCK]]: +; CHECK-IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC2: [[SCALAR_PH]]: +; CHECK-IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC2-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC2: [[LOOP_HEADER]]: +; CHECK-IC2-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC2-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC2-NEXT: br i1 [[UNIFORM_CONDITION]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]] +; CHECK-IC2: [[IF_TRUE]]: +; CHECK-IC2-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC2-NEXT: [[A_VAL:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC2-NEXT: [[ADD:%.*]] = fadd float [[A_VAL]], [[A_VAL]] +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[IF_FALSE]]: +; CHECK-IC2-NEXT: [[B_VAL:%.*]] = load float, ptr [[B_ADDR]], align 4 +; CHECK-IC2-NEXT: [[MUL:%.*]] = fmul float [[B_VAL]], [[B_VAL]] +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[LOOP_LATCH]]: +; CHECK-IC2-NEXT: [[VAL:%.*]] = phi float [ [[ADD]], %[[IF_TRUE]] ], [ [[MUL]], %[[IF_FALSE]] ] +; CHECK-IC2-NEXT: store float [[VAL]], ptr [[B_ADDR]], align 4 +; CHECK-IC2-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-IC2: [[EXIT]]: +; CHECK-IC2-NEXT: ret void +; + i64 %N, i1 %uniform_condition, ptr %A, ptr noalias %B) { +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ] + %b.addr = getelementptr inbounds float, ptr %B, i64 %i + br i1 %uniform_condition, label %if.true, label %if.false + +if.true: + %a.addr = getelementptr inbounds float, ptr %A, i64 %i + %a.val = load float, ptr %a.addr, align 4 + %add = fadd float %a.val, %a.val + br label %loop.latch + +if.false: + %b.val = load float, ptr %b.addr, align 4 + %mul = fmul float %b.val, %b.val + br label %loop.latch + +loop.latch: + %val = phi float [ %add, %if.true ], [ %mul, %if.false ] + store float %val, ptr %b.addr, align 4 + %i.next = add nuw i64 %i, 1 + %cmp = icmp eq i64 %i.next, %N + br i1 %cmp, label %exit, label %loop.header + +exit: + ret void +} + +define void @test_keep_uniform_condition_left_bypass( +; CHECK-IC1-LABEL: define void @test_keep_uniform_condition_left_bypass( +; CHECK-IC1-SAME: i64 [[N:%.*]], i1 [[UNIFORM_CONDITION:%.*]], ptr [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-IC1-NEXT: [[ENTRY:.*]]: +; CHECK-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC1: [[VECTOR_PH]]: +; CHECK-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC1: [[VECTOR_BODY]]: +; CHECK-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH3:.*]] ] +; CHECK-IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC1-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP0]] +; CHECK-IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-IC1-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 +; CHECK-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 +; CHECK-IC1-NEXT: br i1 [[UNIFORM_CONDITION]], label %[[LOOP_LATCH3]], label %[[IF_FALSE1:.*]] +; CHECK-IC1: [[IF_FALSE1]]: +; CHECK-IC1-NEXT: [[TMP4:%.*]] = getelementptr float, ptr [[TMP1]], i32 0 +; CHECK-IC1-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP4]], align 4 +; CHECK-IC1-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]] +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH3]] +; CHECK-IC1: [[LOOP_LATCH3]]: +; CHECK-IC1-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ [[WIDE_LOAD]], %[[VECTOR_BODY]] ], [ [[TMP5]], %[[IF_FALSE1]] ] +; CHECK-IC1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 +; CHECK-IC1-NEXT: store <2 x float> [[VEC_PHI]], ptr [[TMP6]], align 4 +; CHECK-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-IC1: [[MIDDLE_BLOCK]]: +; CHECK-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC1: [[SCALAR_PH]]: +; CHECK-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC1-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC1: [[LOOP_HEADER]]: +; CHECK-IC1-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC1-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC1-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC1-NEXT: [[A_VAL:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC1-NEXT: br i1 [[UNIFORM_CONDITION]], label %[[LOOP_LATCH]], label %[[IF_FALSE:.*]] +; CHECK-IC1: [[IF_FALSE]]: +; CHECK-IC1-NEXT: [[B_VAL:%.*]] = load float, ptr [[B_ADDR]], align 4 +; CHECK-IC1-NEXT: [[MUL:%.*]] = fmul float [[B_VAL]], [[B_VAL]] +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[LOOP_LATCH]]: +; CHECK-IC1-NEXT: [[VAL:%.*]] = phi float [ [[A_VAL]], %[[LOOP_HEADER]] ], [ [[MUL]], %[[IF_FALSE]] ] +; CHECK-IC1-NEXT: store float [[VAL]], ptr [[B_ADDR]], align 4 +; CHECK-IC1-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-IC1: [[EXIT]]: +; CHECK-IC1-NEXT: ret void +; +; CHECK-IC2-LABEL: define void @test_keep_uniform_condition_left_bypass( +; CHECK-IC2-SAME: i64 [[N:%.*]], i1 [[UNIFORM_CONDITION:%.*]], ptr [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-IC2-NEXT: [[ENTRY:.*]]: +; CHECK-IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC2: [[VECTOR_PH]]: +; CHECK-IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC2: [[VECTOR_BODY]]: +; CHECK-IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH5:.*]] ] +; CHECK-IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC2-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP0]] +; CHECK-IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 +; CHECK-IC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 2 +; CHECK-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 +; CHECK-IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP4]], align 4 +; CHECK-IC2-NEXT: br i1 [[UNIFORM_CONDITION]], label %[[LOOP_LATCH5]], label %[[IF_FALSE2:.*]] +; CHECK-IC2: [[IF_FALSE2]]: +; CHECK-IC2-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP1]], i32 0 +; CHECK-IC2-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[TMP1]], i32 2 +; CHECK-IC2-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x float>, ptr [[TMP5]], align 4 +; CHECK-IC2-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP6]], align 4 +; CHECK-IC2-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[WIDE_LOAD3]], [[WIDE_LOAD3]] +; CHECK-IC2-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[WIDE_LOAD4]], [[WIDE_LOAD4]] +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH5]] +; CHECK-IC2: [[LOOP_LATCH5]]: +; CHECK-IC2-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ [[WIDE_LOAD]], %[[VECTOR_BODY]] ], [ [[TMP7]], %[[IF_FALSE2]] ] +; CHECK-IC2-NEXT: [[VEC_PHI6:%.*]] = phi <2 x float> [ [[WIDE_LOAD1]], %[[VECTOR_BODY]] ], [ [[TMP8]], %[[IF_FALSE2]] ] +; CHECK-IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 +; CHECK-IC2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 2 +; CHECK-IC2-NEXT: store <2 x float> [[VEC_PHI]], ptr [[TMP9]], align 4 +; CHECK-IC2-NEXT: store <2 x float> [[VEC_PHI6]], ptr [[TMP10]], align 4 +; CHECK-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-IC2-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-IC2: [[MIDDLE_BLOCK]]: +; CHECK-IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC2: [[SCALAR_PH]]: +; CHECK-IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC2-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC2: [[LOOP_HEADER]]: +; CHECK-IC2-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC2-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC2-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC2-NEXT: [[A_VAL:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC2-NEXT: br i1 [[UNIFORM_CONDITION]], label %[[LOOP_LATCH]], label %[[IF_FALSE:.*]] +; CHECK-IC2: [[IF_FALSE]]: +; CHECK-IC2-NEXT: [[B_VAL:%.*]] = load float, ptr [[B_ADDR]], align 4 +; CHECK-IC2-NEXT: [[MUL:%.*]] = fmul float [[B_VAL]], [[B_VAL]] +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[LOOP_LATCH]]: +; CHECK-IC2-NEXT: [[VAL:%.*]] = phi float [ [[A_VAL]], %[[LOOP_HEADER]] ], [ [[MUL]], %[[IF_FALSE]] ] +; CHECK-IC2-NEXT: store float [[VAL]], ptr [[B_ADDR]], align 4 +; CHECK-IC2-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-IC2: [[EXIT]]: +; CHECK-IC2-NEXT: ret void +; + i64 %N, i1 %uniform_condition, ptr %A, ptr noalias %B) { +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ] + %b.addr = getelementptr inbounds float, ptr %B, i64 %i + %a.addr = getelementptr inbounds float, ptr %A, i64 %i + %a.val = load float, ptr %a.addr, align 4 + br i1 %uniform_condition, label %loop.latch, label %if.false + +if.false: + %b.val = load float, ptr %b.addr, align 4 + %mul = fmul float %b.val, %b.val + br label %loop.latch + +loop.latch: + %val = phi float [ %a.val, %loop.header ], [ %mul, %if.false ] + store float %val, ptr %b.addr, align 4 + %i.next = add nuw i64 %i, 1 + %cmp = icmp eq i64 %i.next, %N + br i1 %cmp, label %exit, label %loop.header + +exit: + ret void +} + +define void @test_keep_uniform_condition_right_bypass( +; CHECK-IC1-LABEL: define void @test_keep_uniform_condition_right_bypass( +; CHECK-IC1-SAME: i64 [[N:%.*]], i1 [[UNIFORM_CONDITION:%.*]], ptr [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-IC1-NEXT: [[ENTRY:.*]]: +; CHECK-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC1: [[VECTOR_PH]]: +; CHECK-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC1: [[VECTOR_BODY]]: +; CHECK-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH2:.*]] ] +; CHECK-IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-IC1-NEXT: br i1 [[UNIFORM_CONDITION]], label %[[IF_TRUE1:.*]], label %[[LOOP_LATCH2]] +; CHECK-IC1: [[IF_TRUE1]]: +; CHECK-IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-IC1-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] +; CHECK-IC1-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4 +; CHECK-IC1-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4 +; CHECK-IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0 +; CHECK-IC1-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP5]], i32 1 +; CHECK-IC1-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP7]], [[TMP7]] +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH2]] +; CHECK-IC1: [[LOOP_LATCH2]]: +; CHECK-IC1-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ [[TMP8]], %[[IF_TRUE1]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] +; CHECK-IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP0]] +; CHECK-IC1-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 0 +; CHECK-IC1-NEXT: store <2 x float> [[VEC_PHI]], ptr [[TMP10]], align 4 +; CHECK-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-IC1-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-IC1: [[MIDDLE_BLOCK]]: +; CHECK-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC1: [[SCALAR_PH]]: +; CHECK-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC1-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC1: [[LOOP_HEADER]]: +; CHECK-IC1-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC1-NEXT: br i1 [[UNIFORM_CONDITION]], label %[[IF_TRUE:.*]], label %[[LOOP_LATCH]] +; CHECK-IC1: [[IF_TRUE]]: +; CHECK-IC1-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC1-NEXT: [[A_VAL:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC1-NEXT: [[ADD:%.*]] = fadd float [[A_VAL]], [[A_VAL]] +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[LOOP_LATCH]]: +; CHECK-IC1-NEXT: [[VAL:%.*]] = phi float [ [[ADD]], %[[IF_TRUE]] ], [ 0.000000e+00, %[[LOOP_HEADER]] ] +; CHECK-IC1-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC1-NEXT: store float [[VAL]], ptr [[B_ADDR]], align 4 +; CHECK-IC1-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-IC1: [[EXIT]]: +; CHECK-IC1-NEXT: ret void +; +; CHECK-IC2-LABEL: define void @test_keep_uniform_condition_right_bypass( +; CHECK-IC2-SAME: i64 [[N:%.*]], i1 [[UNIFORM_CONDITION:%.*]], ptr [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-IC2-NEXT: [[ENTRY:.*]]: +; CHECK-IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC2: [[VECTOR_PH]]: +; CHECK-IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC2: [[VECTOR_BODY]]: +; CHECK-IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH2:.*]] ] +; CHECK-IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-IC2-NEXT: br i1 [[UNIFORM_CONDITION]], label %[[IF_TRUE1:.*]], label %[[LOOP_LATCH2]] +; CHECK-IC2: [[IF_TRUE1]]: +; CHECK-IC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] +; CHECK-IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP4]], align 4 +; CHECK-IC2-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4 +; CHECK-IC2-NEXT: [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 +; CHECK-IC2-NEXT: [[TMP11:%.*]] = insertelement <2 x float> [[TMP10]], float [[TMP9]], i32 1 +; CHECK-IC2-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP6]], align 4 +; CHECK-IC2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP7]], align 4 +; CHECK-IC2-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0 +; CHECK-IC2-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP13]], i32 1 +; CHECK-IC2-NEXT: [[TMP16:%.*]] = fadd <2 x float> [[TMP11]], [[TMP11]] +; CHECK-IC2-NEXT: [[TMP17:%.*]] = fadd <2 x float> [[TMP15]], [[TMP15]] +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH2]] +; CHECK-IC2: [[LOOP_LATCH2]]: +; CHECK-IC2-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ [[TMP16]], %[[IF_TRUE1]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] +; CHECK-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <2 x float> [ [[TMP17]], %[[IF_TRUE1]] ], [ zeroinitializer, %[[VECTOR_BODY]] ] +; CHECK-IC2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP0]] +; CHECK-IC2-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 0 +; CHECK-IC2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 2 +; CHECK-IC2-NEXT: store <2 x float> [[VEC_PHI]], ptr [[TMP19]], align 4 +; CHECK-IC2-NEXT: store <2 x float> [[VEC_PHI3]], ptr [[TMP20]], align 4 +; CHECK-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-IC2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-IC2: [[MIDDLE_BLOCK]]: +; CHECK-IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC2: [[SCALAR_PH]]: +; CHECK-IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC2-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC2: [[LOOP_HEADER]]: +; CHECK-IC2-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC2-NEXT: br i1 [[UNIFORM_CONDITION]], label %[[IF_TRUE:.*]], label %[[LOOP_LATCH]] +; CHECK-IC2: [[IF_TRUE]]: +; CHECK-IC2-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC2-NEXT: [[A_VAL:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC2-NEXT: [[ADD:%.*]] = fadd float [[A_VAL]], [[A_VAL]] +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[LOOP_LATCH]]: +; CHECK-IC2-NEXT: [[VAL:%.*]] = phi float [ [[ADD]], %[[IF_TRUE]] ], [ 0.000000e+00, %[[LOOP_HEADER]] ] +; CHECK-IC2-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC2-NEXT: store float [[VAL]], ptr [[B_ADDR]], align 4 +; CHECK-IC2-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-IC2: [[EXIT]]: +; CHECK-IC2-NEXT: ret void +; + i64 %N, i1 %uniform_condition, ptr %A, ptr noalias %B) { +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ] + br i1 %uniform_condition, label %if.true, label %loop.latch + +if.true: + %a.addr = getelementptr inbounds float, ptr %A, i64 %i + %a.val = load float, ptr %a.addr, align 4 + %add = fadd float %a.val, %a.val + br label %loop.latch + +loop.latch: + %val = phi float [ %add, %if.true ], [ 0.0, %loop.header ] + %b.addr = getelementptr inbounds float, ptr %B, i64 %i + store float %val, ptr %b.addr, align 4 + %i.next = add nuw i64 %i, 1 + %cmp = icmp eq i64 %i.next, %N + br i1 %cmp, label %exit, label %loop.header + +exit: + ret void +} + +define void @test_nested_inner_uniform( +; CHECK-IC1-LABEL: define void @test_nested_inner_uniform( +; CHECK-IC1-SAME: i64 [[N:%.*]], i1 [[UNIFORM_COND:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr noalias [[D:%.*]]) { +; CHECK-IC1-NEXT: [[ENTRY:.*]]: +; CHECK-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC1: [[VECTOR_PH]]: +; CHECK-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC1: [[VECTOR_BODY]]: +; CHECK-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[IF_TRUE_JOIN13:.*]] ] +; CHECK-IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[IF_TRUE_JOIN13]] ] +; CHECK-IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-IC1-NEXT: [[TMP2:%.*]] = urem <2 x i64> [[VEC_IND]], splat (i64 3) +; CHECK-IC1-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[TMP2]], zeroinitializer +; CHECK-IC1-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-IC1-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 +; CHECK-IC1-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK-IC1: [[PRED_LOAD_IF]]: +; CHECK-IC1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP0]] +; CHECK-IC1-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP6]], align 4 +; CHECK-IC1-NEXT: [[TMP8:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE]]: +; CHECK-IC1-NEXT: [[TMP9:%.*]] = phi <2 x float> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ] +; CHECK-IC1-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 +; CHECK-IC1-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] +; CHECK-IC1: [[PRED_LOAD_IF1]]: +; CHECK-IC1-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP1]] +; CHECK-IC1-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 +; CHECK-IC1-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP12]], i32 1 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE2]]: +; CHECK-IC1-NEXT: [[TMP14:%.*]] = phi <2 x float> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], %[[PRED_LOAD_IF1]] ] +; CHECK-IC1-NEXT: br i1 [[UNIFORM_COND]], label %[[IF_TRUE_TRUE8:.*]], label %[[IF_TRUE_FALSE3:.*]] +; CHECK-IC1: [[IF_TRUE_FALSE3]]: +; CHECK-IC1-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; CHECK-IC1-NEXT: br i1 [[TMP15]], label %[[PRED_LOAD_IF4:.*]], label %[[PRED_LOAD_CONTINUE5:.*]] +; CHECK-IC1: [[PRED_LOAD_IF4]]: +; CHECK-IC1-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP0]] +; CHECK-IC1-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4 +; CHECK-IC1-NEXT: [[TMP18:%.*]] = insertelement <2 x float> poison, float [[TMP17]], i32 0 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE5]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE5]]: +; CHECK-IC1-NEXT: [[TMP19:%.*]] = phi <2 x float> [ poison, %[[IF_TRUE_FALSE3]] ], [ [[TMP18]], %[[PRED_LOAD_IF4]] ] +; CHECK-IC1-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; CHECK-IC1-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF6:.*]], label %[[IF_TRUE_JOIN13]] +; CHECK-IC1: [[PRED_LOAD_IF6]]: +; CHECK-IC1-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP1]] +; CHECK-IC1-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4 +; CHECK-IC1-NEXT: [[TMP23:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP22]], i32 1 +; CHECK-IC1-NEXT: br label %[[IF_TRUE_JOIN13]] +; CHECK-IC1: [[IF_TRUE_TRUE8]]: +; CHECK-IC1-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; CHECK-IC1-NEXT: br i1 [[TMP24]], label %[[PRED_LOAD_IF9:.*]], label %[[PRED_LOAD_CONTINUE10:.*]] +; CHECK-IC1: [[PRED_LOAD_IF9]]: +; CHECK-IC1-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-IC1-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4 +; CHECK-IC1-NEXT: [[TMP27:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i32 0 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE10]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE10]]: +; CHECK-IC1-NEXT: [[TMP28:%.*]] = phi <2 x float> [ poison, %[[IF_TRUE_TRUE8]] ], [ [[TMP27]], %[[PRED_LOAD_IF9]] ] +; CHECK-IC1-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; CHECK-IC1-NEXT: br i1 [[TMP29]], label %[[PRED_LOAD_IF11:.*]], label %[[IF_TRUE_JOIN13]] +; CHECK-IC1: [[PRED_LOAD_IF11]]: +; CHECK-IC1-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] +; CHECK-IC1-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4 +; CHECK-IC1-NEXT: [[TMP32:%.*]] = insertelement <2 x float> [[TMP28]], float [[TMP31]], i32 1 +; CHECK-IC1-NEXT: br label %[[IF_TRUE_JOIN13]] +; CHECK-IC1: [[IF_TRUE_JOIN13]]: +; CHECK-IC1-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ [[TMP19]], %[[PRED_LOAD_CONTINUE5]] ], [ [[TMP23]], %[[PRED_LOAD_IF6]] ], [ [[TMP28]], %[[PRED_LOAD_CONTINUE10]] ], [ [[TMP32]], %[[PRED_LOAD_IF11]] ] +; CHECK-IC1-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x float> [[TMP14]], <2 x float> [[VEC_PHI]] +; CHECK-IC1-NEXT: [[TMP33:%.*]] = fmul <2 x float> [[PREDPHI]], [[PREDPHI]] +; CHECK-IC1-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[TMP0]] +; CHECK-IC1-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i32 0 +; CHECK-IC1-NEXT: store <2 x float> [[TMP33]], ptr [[TMP35]], align 4 +; CHECK-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-IC1-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-IC1-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-IC1: [[MIDDLE_BLOCK]]: +; CHECK-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC1: [[SCALAR_PH]]: +; CHECK-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC1-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC1: [[LOOP_HEADER]]: +; CHECK-IC1-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC1-NEXT: [[MOD:%.*]] = urem i64 [[I]], 3 +; CHECK-IC1-NEXT: [[DIVERGANT_COND:%.*]] = icmp eq i64 [[MOD]], 0 +; CHECK-IC1-NEXT: br i1 [[DIVERGANT_COND]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]] +; CHECK-IC1: [[IF_TRUE]]: +; CHECK-IC1-NEXT: br i1 [[UNIFORM_COND]], label %[[IF_TRUE_TRUE:.*]], label %[[IF_TRUE_FALSE:.*]] +; CHECK-IC1: [[IF_TRUE_TRUE]]: +; CHECK-IC1-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC1-NEXT: [[A_LOAD:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[IF_TRUE_FALSE]]: +; CHECK-IC1-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC1-NEXT: [[B_LOAD:%.*]] = load float, ptr [[B_ADDR]], align 4 +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[IF_FALSE]]: +; CHECK-IC1-NEXT: [[C_ADDR:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[I]] +; CHECK-IC1-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C_ADDR]], align 4 +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[LOOP_LATCH]]: +; CHECK-IC1-NEXT: [[LOAD:%.*]] = phi float [ [[C_LOAD]], %[[IF_FALSE]] ], [ [[A_LOAD]], %[[IF_TRUE_TRUE]] ], [ [[B_LOAD]], %[[IF_TRUE_FALSE]] ] +; CHECK-IC1-NEXT: [[MUL:%.*]] = fmul float [[LOAD]], [[LOAD]] +; CHECK-IC1-NEXT: [[D_ADDR:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[I]] +; CHECK-IC1-NEXT: store float [[MUL]], ptr [[D_ADDR]], align 4 +; CHECK-IC1-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-IC1: [[EXIT]]: +; CHECK-IC1-NEXT: ret void +; +; CHECK-IC2-LABEL: define void @test_nested_inner_uniform( +; CHECK-IC2-SAME: i64 [[N:%.*]], i1 [[UNIFORM_COND:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr noalias [[D:%.*]]) { +; CHECK-IC2-NEXT: [[ENTRY:.*]]: +; CHECK-IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC2: [[VECTOR_PH]]: +; CHECK-IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC2: [[VECTOR_BODY]]: +; CHECK-IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[IF_TRUE_JOIN25:.*]] ] +; CHECK-IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[IF_TRUE_JOIN25]] ] +; CHECK-IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-IC2-NEXT: [[TMP4:%.*]] = urem <2 x i64> [[VEC_IND]], splat (i64 3) +; CHECK-IC2-NEXT: [[TMP5:%.*]] = urem <2 x i64> [[STEP_ADD]], splat (i64 3) +; CHECK-IC2-NEXT: [[TMP6:%.*]] = icmp eq <2 x i64> [[TMP4]], zeroinitializer +; CHECK-IC2-NEXT: [[TMP7:%.*]] = icmp eq <2 x i64> [[TMP5]], zeroinitializer +; CHECK-IC2-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true) +; CHECK-IC2-NEXT: [[TMP9:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) +; CHECK-IC2-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK-IC2: [[PRED_LOAD_IF]]: +; CHECK-IC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP0]] +; CHECK-IC2-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 +; CHECK-IC2-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE]]: +; CHECK-IC2-NEXT: [[TMP14:%.*]] = phi <2 x float> [ poison, %[[VECTOR_BODY]] ], [ [[TMP13]], %[[PRED_LOAD_IF]] ] +; CHECK-IC2-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP15]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] +; CHECK-IC2: [[PRED_LOAD_IF1]]: +; CHECK-IC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP1]] +; CHECK-IC2-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4 +; CHECK-IC2-NEXT: [[TMP18:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP17]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE2]]: +; CHECK-IC2-NEXT: [[TMP19:%.*]] = phi <2 x float> [ [[TMP14]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP18]], %[[PRED_LOAD_IF1]] ] +; CHECK-IC2-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK-IC2: [[PRED_LOAD_IF3]]: +; CHECK-IC2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4 +; CHECK-IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x float> poison, float [[TMP22]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE4]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE4]]: +; CHECK-IC2-NEXT: [[TMP24:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP23]], %[[PRED_LOAD_IF3]] ] +; CHECK-IC2-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP25]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] +; CHECK-IC2: [[PRED_LOAD_IF5]]: +; CHECK-IC2-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4 +; CHECK-IC2-NEXT: [[TMP28:%.*]] = insertelement <2 x float> [[TMP24]], float [[TMP27]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE6]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE6]]: +; CHECK-IC2-NEXT: [[TMP29:%.*]] = phi <2 x float> [ [[TMP24]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ] +; CHECK-IC2-NEXT: br i1 [[UNIFORM_COND]], label %[[IF_TRUE_TRUE16:.*]], label %[[IF_TRUE_FALSE7:.*]] +; CHECK-IC2: [[IF_TRUE_FALSE7]]: +; CHECK-IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP30]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] +; CHECK-IC2: [[PRED_LOAD_IF8]]: +; CHECK-IC2-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP0]] +; CHECK-IC2-NEXT: [[TMP32:%.*]] = load float, ptr [[TMP31]], align 4 +; CHECK-IC2-NEXT: [[TMP33:%.*]] = insertelement <2 x float> poison, float [[TMP32]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE9]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE9]]: +; CHECK-IC2-NEXT: [[TMP34:%.*]] = phi <2 x float> [ poison, %[[IF_TRUE_FALSE7]] ], [ [[TMP33]], %[[PRED_LOAD_IF8]] ] +; CHECK-IC2-NEXT: [[TMP35:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP35]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11:.*]] +; CHECK-IC2: [[PRED_LOAD_IF10]]: +; CHECK-IC2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP1]] +; CHECK-IC2-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP36]], align 4 +; CHECK-IC2-NEXT: [[TMP38:%.*]] = insertelement <2 x float> [[TMP34]], float [[TMP37]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE11]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE11]]: +; CHECK-IC2-NEXT: [[TMP39:%.*]] = phi <2 x float> [ [[TMP34]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP38]], %[[PRED_LOAD_IF10]] ] +; CHECK-IC2-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP40]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]] +; CHECK-IC2: [[PRED_LOAD_IF12]]: +; CHECK-IC2-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP42:%.*]] = load float, ptr [[TMP41]], align 4 +; CHECK-IC2-NEXT: [[TMP43:%.*]] = insertelement <2 x float> poison, float [[TMP42]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE13]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE13]]: +; CHECK-IC2-NEXT: [[TMP44:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP43]], %[[PRED_LOAD_IF12]] ] +; CHECK-IC2-NEXT: [[TMP45:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP45]], label %[[PRED_LOAD_IF14:.*]], label %[[IF_TRUE_JOIN25]] +; CHECK-IC2: [[PRED_LOAD_IF14]]: +; CHECK-IC2-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP47:%.*]] = load float, ptr [[TMP46]], align 4 +; CHECK-IC2-NEXT: [[TMP48:%.*]] = insertelement <2 x float> [[TMP44]], float [[TMP47]], i32 1 +; CHECK-IC2-NEXT: br label %[[IF_TRUE_JOIN25]] +; CHECK-IC2: [[IF_TRUE_TRUE16]]: +; CHECK-IC2-NEXT: [[TMP49:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP49]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]] +; CHECK-IC2: [[PRED_LOAD_IF17]]: +; CHECK-IC2-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-IC2-NEXT: [[TMP51:%.*]] = load float, ptr [[TMP50]], align 4 +; CHECK-IC2-NEXT: [[TMP52:%.*]] = insertelement <2 x float> poison, float [[TMP51]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE18]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE18]]: +; CHECK-IC2-NEXT: [[TMP53:%.*]] = phi <2 x float> [ poison, %[[IF_TRUE_TRUE16]] ], [ [[TMP52]], %[[PRED_LOAD_IF17]] ] +; CHECK-IC2-NEXT: [[TMP54:%.*]] = extractelement <2 x i1> [[TMP6]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP54]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] +; CHECK-IC2: [[PRED_LOAD_IF19]]: +; CHECK-IC2-NEXT: [[TMP55:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] +; CHECK-IC2-NEXT: [[TMP56:%.*]] = load float, ptr [[TMP55]], align 4 +; CHECK-IC2-NEXT: [[TMP57:%.*]] = insertelement <2 x float> [[TMP53]], float [[TMP56]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE20]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE20]]: +; CHECK-IC2-NEXT: [[TMP58:%.*]] = phi <2 x float> [ [[TMP53]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP57]], %[[PRED_LOAD_IF19]] ] +; CHECK-IC2-NEXT: [[TMP59:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP59]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]] +; CHECK-IC2: [[PRED_LOAD_IF21]]: +; CHECK-IC2-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP61:%.*]] = load float, ptr [[TMP60]], align 4 +; CHECK-IC2-NEXT: [[TMP62:%.*]] = insertelement <2 x float> poison, float [[TMP61]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE22]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE22]]: +; CHECK-IC2-NEXT: [[TMP63:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP62]], %[[PRED_LOAD_IF21]] ] +; CHECK-IC2-NEXT: [[TMP64:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP64]], label %[[PRED_LOAD_IF23:.*]], label %[[IF_TRUE_JOIN25]] +; CHECK-IC2: [[PRED_LOAD_IF23]]: +; CHECK-IC2-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP66:%.*]] = load float, ptr [[TMP65]], align 4 +; CHECK-IC2-NEXT: [[TMP67:%.*]] = insertelement <2 x float> [[TMP63]], float [[TMP66]], i32 1 +; CHECK-IC2-NEXT: br label %[[IF_TRUE_JOIN25]] +; CHECK-IC2: [[IF_TRUE_JOIN25]]: +; CHECK-IC2-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ [[TMP39]], %[[PRED_LOAD_IF14]] ], [ [[TMP39]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP58]], %[[PRED_LOAD_IF23]] ], [ [[TMP58]], %[[PRED_LOAD_CONTINUE22]] ] +; CHECK-IC2-NEXT: [[VEC_PHI26:%.*]] = phi <2 x float> [ [[TMP44]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP48]], %[[PRED_LOAD_IF14]] ], [ [[TMP63]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP67]], %[[PRED_LOAD_IF23]] ] +; CHECK-IC2-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x float> [[TMP19]], <2 x float> [[VEC_PHI]] +; CHECK-IC2-NEXT: [[PREDPHI27:%.*]] = select <2 x i1> [[TMP9]], <2 x float> [[TMP29]], <2 x float> [[VEC_PHI26]] +; CHECK-IC2-NEXT: [[TMP68:%.*]] = fmul <2 x float> [[PREDPHI]], [[PREDPHI]] +; CHECK-IC2-NEXT: [[TMP69:%.*]] = fmul <2 x float> [[PREDPHI27]], [[PREDPHI27]] +; CHECK-IC2-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[TMP0]] +; CHECK-IC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP70]], i32 0 +; CHECK-IC2-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, ptr [[TMP70]], i32 2 +; CHECK-IC2-NEXT: store <2 x float> [[TMP68]], ptr [[TMP71]], align 4 +; CHECK-IC2-NEXT: store <2 x float> [[TMP69]], ptr [[TMP72]], align 4 +; CHECK-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-IC2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-IC2-NEXT: [[TMP73:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[TMP73]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-IC2: [[MIDDLE_BLOCK]]: +; CHECK-IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC2: [[SCALAR_PH]]: +; CHECK-IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC2-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC2: [[LOOP_HEADER]]: +; CHECK-IC2-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC2-NEXT: [[MOD:%.*]] = urem i64 [[I]], 3 +; CHECK-IC2-NEXT: [[DIVERGANT_COND:%.*]] = icmp eq i64 [[MOD]], 0 +; CHECK-IC2-NEXT: br i1 [[DIVERGANT_COND]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]] +; CHECK-IC2: [[IF_TRUE]]: +; CHECK-IC2-NEXT: br i1 [[UNIFORM_COND]], label %[[IF_TRUE_TRUE:.*]], label %[[IF_TRUE_FALSE:.*]] +; CHECK-IC2: [[IF_TRUE_TRUE]]: +; CHECK-IC2-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC2-NEXT: [[A_LOAD:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[IF_TRUE_FALSE]]: +; CHECK-IC2-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC2-NEXT: [[B_LOAD:%.*]] = load float, ptr [[B_ADDR]], align 4 +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[IF_FALSE]]: +; CHECK-IC2-NEXT: [[C_ADDR:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[I]] +; CHECK-IC2-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C_ADDR]], align 4 +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[LOOP_LATCH]]: +; CHECK-IC2-NEXT: [[LOAD:%.*]] = phi float [ [[C_LOAD]], %[[IF_FALSE]] ], [ [[A_LOAD]], %[[IF_TRUE_TRUE]] ], [ [[B_LOAD]], %[[IF_TRUE_FALSE]] ] +; CHECK-IC2-NEXT: [[MUL:%.*]] = fmul float [[LOAD]], [[LOAD]] +; CHECK-IC2-NEXT: [[D_ADDR:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[I]] +; CHECK-IC2-NEXT: store float [[MUL]], ptr [[D_ADDR]], align 4 +; CHECK-IC2-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-IC2: [[EXIT]]: +; CHECK-IC2-NEXT: ret void +; + i64 %N, i1 %uniform_cond, ptr %A, ptr %B, ptr %C, ptr noalias %D) { +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ] + %mod = urem i64 %i, 3 + %divergant_cond = icmp eq i64 %mod, 0 + br i1 %divergant_cond, label %if.true, label %if.false + +if.true: + br i1 %uniform_cond, label %if.true.true, label %if.true.false + +if.true.true: + %a.addr = getelementptr inbounds float, ptr %A, i64 %i + %a.load = load float, ptr %a.addr, align 4 + br label %if.true.join + +if.true.false: + %b.addr = getelementptr inbounds float, ptr %B, i64 %i + %b.load = load float, ptr %b.addr, align 4 + br label %if.true.join + +if.true.join: + %a.or.b.load = phi float [ %a.load, %if.true.true ], [ %b.load, %if.true.false ] + br label %loop.latch + +if.false: + %c.addr = getelementptr inbounds float, ptr %C, i64 %i + %c.load = load float, ptr %c.addr, align 4 + br label %loop.latch + +loop.latch: + %load = phi float [ %a.or.b.load, %if.true.join ], [ %c.load, %if.false ] + %mul = fmul float %load, %load + %d.addr = getelementptr inbounds float, ptr %D, i64 %i + store float %mul, ptr %d.addr, align 4 + %i.next = add nuw i64 %i, 1 + %cmp = icmp eq i64 %i.next, %N + br i1 %cmp, label %exit, label %loop.header + +exit: + ret void +} + +define void @test_nested_outer_uniform( +; CHECK-IC1-LABEL: define void @test_nested_outer_uniform( +; CHECK-IC1-SAME: i64 [[N:%.*]], i1 [[UNIFORM_COND:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr noalias [[D:%.*]]) { +; CHECK-IC1-NEXT: [[ENTRY:.*]]: +; CHECK-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC1: [[VECTOR_PH]]: +; CHECK-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC1: [[VECTOR_BODY]]: +; CHECK-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH9:.*]] ] +; CHECK-IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[LOOP_LATCH9]] ] +; CHECK-IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-IC1-NEXT: br i1 [[UNIFORM_COND]], label %[[IF_TRUE2:.*]], label %[[IF_FALSE1:.*]] +; CHECK-IC1: [[IF_FALSE1]]: +; CHECK-IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP0]] +; CHECK-IC1-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP1]] +; CHECK-IC1-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4 +; CHECK-IC1-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4 +; CHECK-IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0 +; CHECK-IC1-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP5]], i32 1 +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH9]] +; CHECK-IC1: [[IF_TRUE2]]: +; CHECK-IC1-NEXT: [[TMP8:%.*]] = urem <2 x i64> [[VEC_IND]], splat (i64 3) +; CHECK-IC1-NEXT: [[TMP9:%.*]] = icmp eq <2 x i64> [[TMP8]], zeroinitializer +; CHECK-IC1-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true) +; CHECK-IC1-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 +; CHECK-IC1-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK-IC1: [[PRED_LOAD_IF]]: +; CHECK-IC1-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP0]] +; CHECK-IC1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4 +; CHECK-IC1-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE]]: +; CHECK-IC1-NEXT: [[TMP15:%.*]] = phi <2 x float> [ poison, %[[IF_TRUE2]] ], [ [[TMP14]], %[[PRED_LOAD_IF]] ] +; CHECK-IC1-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 +; CHECK-IC1-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK-IC1: [[PRED_LOAD_IF3]]: +; CHECK-IC1-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP1]] +; CHECK-IC1-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP17]], align 4 +; CHECK-IC1-NEXT: [[TMP19:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP18]], i32 1 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE4]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE4]]: +; CHECK-IC1-NEXT: [[TMP20:%.*]] = phi <2 x float> [ [[TMP15]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP19]], %[[PRED_LOAD_IF3]] ] +; CHECK-IC1-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 +; CHECK-IC1-NEXT: br i1 [[TMP21]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] +; CHECK-IC1: [[PRED_LOAD_IF5]]: +; CHECK-IC1-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-IC1-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4 +; CHECK-IC1-NEXT: [[TMP24:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i32 0 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE6]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE6]]: +; CHECK-IC1-NEXT: [[TMP25:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], %[[PRED_LOAD_IF5]] ] +; CHECK-IC1-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1 +; CHECK-IC1-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]] +; CHECK-IC1: [[PRED_LOAD_IF7]]: +; CHECK-IC1-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] +; CHECK-IC1-NEXT: [[TMP28:%.*]] = load float, ptr [[TMP27]], align 4 +; CHECK-IC1-NEXT: [[TMP29:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP28]], i32 1 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE8]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE8]]: +; CHECK-IC1-NEXT: [[TMP30:%.*]] = phi <2 x float> [ [[TMP25]], %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP29]], %[[PRED_LOAD_IF7]] ] +; CHECK-IC1-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP10]], <2 x float> [[TMP20]], <2 x float> [[TMP30]] +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH9]] +; CHECK-IC1: [[LOOP_LATCH9]]: +; CHECK-IC1-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ [[PREDPHI]], %[[PRED_LOAD_CONTINUE8]] ], [ [[TMP7]], %[[IF_FALSE1]] ] +; CHECK-IC1-NEXT: [[TMP31:%.*]] = fmul <2 x float> [[VEC_PHI]], [[VEC_PHI]] +; CHECK-IC1-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[TMP0]] +; CHECK-IC1-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i32 0 +; CHECK-IC1-NEXT: store <2 x float> [[TMP31]], ptr [[TMP33]], align 4 +; CHECK-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-IC1-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-IC1-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[TMP34]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-IC1: [[MIDDLE_BLOCK]]: +; CHECK-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC1: [[SCALAR_PH]]: +; CHECK-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC1-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC1: [[LOOP_HEADER]]: +; CHECK-IC1-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC1-NEXT: br i1 [[UNIFORM_COND]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]] +; CHECK-IC1: [[IF_TRUE]]: +; CHECK-IC1-NEXT: [[MOD:%.*]] = urem i64 [[I]], 3 +; CHECK-IC1-NEXT: [[DIVERGANT_COND:%.*]] = icmp eq i64 [[MOD]], 0 +; CHECK-IC1-NEXT: br i1 [[DIVERGANT_COND]], label %[[IF_TRUE_TRUE:.*]], label %[[IF_TRUE_FALSE:.*]] +; CHECK-IC1: [[IF_TRUE_TRUE]]: +; CHECK-IC1-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC1-NEXT: [[A_LOAD:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[IF_TRUE_FALSE]]: +; CHECK-IC1-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC1-NEXT: [[B_LOAD:%.*]] = load float, ptr [[B_ADDR]], align 4 +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[IF_FALSE]]: +; CHECK-IC1-NEXT: [[C_ADDR:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[I]] +; CHECK-IC1-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C_ADDR]], align 4 +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[LOOP_LATCH]]: +; CHECK-IC1-NEXT: [[LOAD:%.*]] = phi float [ [[C_LOAD]], %[[IF_FALSE]] ], [ [[A_LOAD]], %[[IF_TRUE_TRUE]] ], [ [[B_LOAD]], %[[IF_TRUE_FALSE]] ] +; CHECK-IC1-NEXT: [[MUL:%.*]] = fmul float [[LOAD]], [[LOAD]] +; CHECK-IC1-NEXT: [[D_ADDR:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[I]] +; CHECK-IC1-NEXT: store float [[MUL]], ptr [[D_ADDR]], align 4 +; CHECK-IC1-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-IC1: [[EXIT]]: +; CHECK-IC1-NEXT: ret void +; +; CHECK-IC2-LABEL: define void @test_nested_outer_uniform( +; CHECK-IC2-SAME: i64 [[N:%.*]], i1 [[UNIFORM_COND:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr noalias [[D:%.*]]) { +; CHECK-IC2-NEXT: [[ENTRY:.*]]: +; CHECK-IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC2: [[VECTOR_PH]]: +; CHECK-IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC2: [[VECTOR_BODY]]: +; CHECK-IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH18:.*]] ] +; CHECK-IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[LOOP_LATCH18]] ] +; CHECK-IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-IC2-NEXT: br i1 [[UNIFORM_COND]], label %[[IF_TRUE2:.*]], label %[[IF_FALSE1:.*]] +; CHECK-IC2: [[IF_FALSE1]]: +; CHECK-IC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP0]] +; CHECK-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP1]] +; CHECK-IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP4]], align 4 +; CHECK-IC2-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4 +; CHECK-IC2-NEXT: [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i32 0 +; CHECK-IC2-NEXT: [[TMP11:%.*]] = insertelement <2 x float> [[TMP10]], float [[TMP9]], i32 1 +; CHECK-IC2-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP6]], align 4 +; CHECK-IC2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP7]], align 4 +; CHECK-IC2-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0 +; CHECK-IC2-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP13]], i32 1 +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH18]] +; CHECK-IC2: [[IF_TRUE2]]: +; CHECK-IC2-NEXT: [[TMP16:%.*]] = urem <2 x i64> [[VEC_IND]], splat (i64 3) +; CHECK-IC2-NEXT: [[TMP17:%.*]] = urem <2 x i64> [[STEP_ADD]], splat (i64 3) +; CHECK-IC2-NEXT: [[TMP18:%.*]] = icmp eq <2 x i64> [[TMP16]], zeroinitializer +; CHECK-IC2-NEXT: [[TMP19:%.*]] = icmp eq <2 x i64> [[TMP17]], zeroinitializer +; CHECK-IC2-NEXT: [[TMP20:%.*]] = xor <2 x i1> [[TMP18]], splat (i1 true) +; CHECK-IC2-NEXT: [[TMP21:%.*]] = xor <2 x i1> [[TMP19]], splat (i1 true) +; CHECK-IC2-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP20]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK-IC2: [[PRED_LOAD_IF]]: +; CHECK-IC2-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP0]] +; CHECK-IC2-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4 +; CHECK-IC2-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP24]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE]]: +; CHECK-IC2-NEXT: [[TMP26:%.*]] = phi <2 x float> [ poison, %[[IF_TRUE2]] ], [ [[TMP25]], %[[PRED_LOAD_IF]] ] +; CHECK-IC2-NEXT: [[TMP27:%.*]] = extractelement <2 x i1> [[TMP20]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP27]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK-IC2: [[PRED_LOAD_IF3]]: +; CHECK-IC2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP1]] +; CHECK-IC2-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP28]], align 4 +; CHECK-IC2-NEXT: [[TMP30:%.*]] = insertelement <2 x float> [[TMP26]], float [[TMP29]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE4]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE4]]: +; CHECK-IC2-NEXT: [[TMP31:%.*]] = phi <2 x float> [ [[TMP26]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP30]], %[[PRED_LOAD_IF3]] ] +; CHECK-IC2-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP32]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] +; CHECK-IC2: [[PRED_LOAD_IF5]]: +; CHECK-IC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP34:%.*]] = load float, ptr [[TMP33]], align 4 +; CHECK-IC2-NEXT: [[TMP35:%.*]] = insertelement <2 x float> poison, float [[TMP34]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE6]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE6]]: +; CHECK-IC2-NEXT: [[TMP36:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP35]], %[[PRED_LOAD_IF5]] ] +; CHECK-IC2-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP37]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]] +; CHECK-IC2: [[PRED_LOAD_IF7]]: +; CHECK-IC2-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP38]], align 4 +; CHECK-IC2-NEXT: [[TMP40:%.*]] = insertelement <2 x float> [[TMP36]], float [[TMP39]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE8]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE8]]: +; CHECK-IC2-NEXT: [[TMP41:%.*]] = phi <2 x float> [ [[TMP36]], %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP40]], %[[PRED_LOAD_IF7]] ] +; CHECK-IC2-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP18]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP42]], label %[[PRED_LOAD_IF9:.*]], label %[[PRED_LOAD_CONTINUE10:.*]] +; CHECK-IC2: [[PRED_LOAD_IF9]]: +; CHECK-IC2-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-IC2-NEXT: [[TMP44:%.*]] = load float, ptr [[TMP43]], align 4 +; CHECK-IC2-NEXT: [[TMP45:%.*]] = insertelement <2 x float> poison, float [[TMP44]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE10]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE10]]: +; CHECK-IC2-NEXT: [[TMP46:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE8]] ], [ [[TMP45]], %[[PRED_LOAD_IF9]] ] +; CHECK-IC2-NEXT: [[TMP47:%.*]] = extractelement <2 x i1> [[TMP18]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP47]], label %[[PRED_LOAD_IF11:.*]], label %[[PRED_LOAD_CONTINUE12:.*]] +; CHECK-IC2: [[PRED_LOAD_IF11]]: +; CHECK-IC2-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] +; CHECK-IC2-NEXT: [[TMP49:%.*]] = load float, ptr [[TMP48]], align 4 +; CHECK-IC2-NEXT: [[TMP50:%.*]] = insertelement <2 x float> [[TMP46]], float [[TMP49]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE12]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE12]]: +; CHECK-IC2-NEXT: [[TMP51:%.*]] = phi <2 x float> [ [[TMP46]], %[[PRED_LOAD_CONTINUE10]] ], [ [[TMP50]], %[[PRED_LOAD_IF11]] ] +; CHECK-IC2-NEXT: [[TMP52:%.*]] = extractelement <2 x i1> [[TMP19]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP52]], label %[[PRED_LOAD_IF13:.*]], label %[[PRED_LOAD_CONTINUE14:.*]] +; CHECK-IC2: [[PRED_LOAD_IF13]]: +; CHECK-IC2-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP54:%.*]] = load float, ptr [[TMP53]], align 4 +; CHECK-IC2-NEXT: [[TMP55:%.*]] = insertelement <2 x float> poison, float [[TMP54]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE14]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE14]]: +; CHECK-IC2-NEXT: [[TMP56:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE12]] ], [ [[TMP55]], %[[PRED_LOAD_IF13]] ] +; CHECK-IC2-NEXT: [[TMP57:%.*]] = extractelement <2 x i1> [[TMP19]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP57]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]] +; CHECK-IC2: [[PRED_LOAD_IF15]]: +; CHECK-IC2-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP59:%.*]] = load float, ptr [[TMP58]], align 4 +; CHECK-IC2-NEXT: [[TMP60:%.*]] = insertelement <2 x float> [[TMP56]], float [[TMP59]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE16]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE16]]: +; CHECK-IC2-NEXT: [[TMP61:%.*]] = phi <2 x float> [ [[TMP56]], %[[PRED_LOAD_CONTINUE14]] ], [ [[TMP60]], %[[PRED_LOAD_IF15]] ] +; CHECK-IC2-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP20]], <2 x float> [[TMP31]], <2 x float> [[TMP51]] +; CHECK-IC2-NEXT: [[PREDPHI17:%.*]] = select <2 x i1> [[TMP21]], <2 x float> [[TMP41]], <2 x float> [[TMP61]] +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH18]] +; CHECK-IC2: [[LOOP_LATCH18]]: +; CHECK-IC2-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ [[PREDPHI]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP11]], %[[IF_FALSE1]] ] +; CHECK-IC2-NEXT: [[VEC_PHI19:%.*]] = phi <2 x float> [ [[PREDPHI17]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP15]], %[[IF_FALSE1]] ] +; CHECK-IC2-NEXT: [[TMP62:%.*]] = fmul <2 x float> [[VEC_PHI]], [[VEC_PHI]] +; CHECK-IC2-NEXT: [[TMP63:%.*]] = fmul <2 x float> [[VEC_PHI19]], [[VEC_PHI19]] +; CHECK-IC2-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[TMP0]] +; CHECK-IC2-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP64]], i32 0 +; CHECK-IC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[TMP64]], i32 2 +; CHECK-IC2-NEXT: store <2 x float> [[TMP62]], ptr [[TMP65]], align 4 +; CHECK-IC2-NEXT: store <2 x float> [[TMP63]], ptr [[TMP66]], align 4 +; CHECK-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-IC2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-IC2-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-IC2: [[MIDDLE_BLOCK]]: +; CHECK-IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC2: [[SCALAR_PH]]: +; CHECK-IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC2-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC2: [[LOOP_HEADER]]: +; CHECK-IC2-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC2-NEXT: br i1 [[UNIFORM_COND]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]] +; CHECK-IC2: [[IF_TRUE]]: +; CHECK-IC2-NEXT: [[MOD:%.*]] = urem i64 [[I]], 3 +; CHECK-IC2-NEXT: [[DIVERGANT_COND:%.*]] = icmp eq i64 [[MOD]], 0 +; CHECK-IC2-NEXT: br i1 [[DIVERGANT_COND]], label %[[IF_TRUE_TRUE:.*]], label %[[IF_TRUE_FALSE:.*]] +; CHECK-IC2: [[IF_TRUE_TRUE]]: +; CHECK-IC2-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC2-NEXT: [[A_LOAD:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[IF_TRUE_FALSE]]: +; CHECK-IC2-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC2-NEXT: [[B_LOAD:%.*]] = load float, ptr [[B_ADDR]], align 4 +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[IF_FALSE]]: +; CHECK-IC2-NEXT: [[C_ADDR:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[I]] +; CHECK-IC2-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C_ADDR]], align 4 +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[LOOP_LATCH]]: +; CHECK-IC2-NEXT: [[LOAD:%.*]] = phi float [ [[C_LOAD]], %[[IF_FALSE]] ], [ [[A_LOAD]], %[[IF_TRUE_TRUE]] ], [ [[B_LOAD]], %[[IF_TRUE_FALSE]] ] +; CHECK-IC2-NEXT: [[MUL:%.*]] = fmul float [[LOAD]], [[LOAD]] +; CHECK-IC2-NEXT: [[D_ADDR:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[I]] +; CHECK-IC2-NEXT: store float [[MUL]], ptr [[D_ADDR]], align 4 +; CHECK-IC2-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-IC2: [[EXIT]]: +; CHECK-IC2-NEXT: ret void +; + i64 %N, i1 %uniform_cond, ptr %A, ptr %B, ptr %C, ptr noalias %D) { +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ] + br i1 %uniform_cond, label %if.true, label %if.false + +if.true: + %mod = urem i64 %i, 3 + %divergant_cond = icmp eq i64 %mod, 0 + br i1 %divergant_cond, label %if.true.true, label %if.true.false + +if.true.true: + %a.addr = getelementptr inbounds float, ptr %A, i64 %i + %a.load = load float, ptr %a.addr, align 4 + br label %if.true.join + +if.true.false: + %b.addr = getelementptr inbounds float, ptr %B, i64 %i + %b.load = load float, ptr %b.addr, align 4 + br label %if.true.join + +if.true.join: + %a.or.b.load = phi float [ %a.load, %if.true.true ], [ %b.load, %if.true.false ] + br label %loop.latch + +if.false: + %c.addr = getelementptr inbounds float, ptr %C, i64 %i + %c.load = load float, ptr %c.addr, align 4 + br label %loop.latch + +loop.latch: + %load = phi float [ %a.or.b.load, %if.true.join ], [ %c.load, %if.false ] + %mul = fmul float %load, %load + %d.addr = getelementptr inbounds float, ptr %D, i64 %i + store float %mul, ptr %d.addr, align 4 + %i.next = add nuw i64 %i, 1 + %cmp = icmp eq i64 %i.next, %N + br i1 %cmp, label %exit, label %loop.header + +exit: + ret void +} + +; Here, the loop.latch block has three predecessors, and currently, +; this mean that no uniform branches that join in that block can be +; kept (it would require mixing phis and blends, it is possible but +; not yet implemented). +define void @test_no_dedicated_join_block( +; CHECK-IC1-LABEL: define void @test_no_dedicated_join_block( +; CHECK-IC1-SAME: i64 [[N:%.*]], i1 [[UNIFORM_COND:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr noalias [[D:%.*]]) { +; CHECK-IC1-NEXT: [[ENTRY:.*]]: +; CHECK-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC1: [[VECTOR_PH]]: +; CHECK-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[UNIFORM_COND]], i64 0 +; CHECK-IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-IC1-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC1: [[VECTOR_BODY]]: +; CHECK-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE10:.*]] ] +; CHECK-IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE10]] ] +; CHECK-IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 +; CHECK-IC1-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 +; CHECK-IC1-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK-IC1: [[PRED_LOAD_IF]]: +; CHECK-IC1-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP1]] +; CHECK-IC1-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP4]], align 4 +; CHECK-IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE]]: +; CHECK-IC1-NEXT: [[TMP7:%.*]] = phi <2 x float> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ] +; CHECK-IC1-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 +; CHECK-IC1-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] +; CHECK-IC1: [[PRED_LOAD_IF1]]: +; CHECK-IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP2]] +; CHECK-IC1-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +; CHECK-IC1-NEXT: [[TMP11:%.*]] = insertelement <2 x float> [[TMP7]], float [[TMP10]], i32 1 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE2]]: +; CHECK-IC1-NEXT: [[TMP12:%.*]] = phi <2 x float> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP11]], %[[PRED_LOAD_IF1]] ] +; CHECK-IC1-NEXT: [[TMP13:%.*]] = urem <2 x i64> [[VEC_IND]], splat (i64 3) +; CHECK-IC1-NEXT: [[TMP14:%.*]] = icmp eq <2 x i64> [[TMP13]], zeroinitializer +; CHECK-IC1-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP14]], splat (i1 true) +; CHECK-IC1-NEXT: [[TMP16:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer +; CHECK-IC1-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0 +; CHECK-IC1-NEXT: br i1 [[TMP17]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK-IC1: [[PRED_LOAD_IF3]]: +; CHECK-IC1-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP1]] +; CHECK-IC1-NEXT: [[TMP19:%.*]] = load float, ptr [[TMP18]], align 4 +; CHECK-IC1-NEXT: [[TMP20:%.*]] = insertelement <2 x float> poison, float [[TMP19]], i32 0 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE4]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE4]]: +; CHECK-IC1-NEXT: [[TMP21:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP20]], %[[PRED_LOAD_IF3]] ] +; CHECK-IC1-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP16]], i32 1 +; CHECK-IC1-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] +; CHECK-IC1: [[PRED_LOAD_IF5]]: +; CHECK-IC1-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]] +; CHECK-IC1-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP23]], align 4 +; CHECK-IC1-NEXT: [[TMP25:%.*]] = insertelement <2 x float> [[TMP21]], float [[TMP24]], i32 1 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE6]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE6]]: +; CHECK-IC1-NEXT: [[TMP26:%.*]] = phi <2 x float> [ [[TMP21]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP25]], %[[PRED_LOAD_IF5]] ] +; CHECK-IC1-NEXT: [[TMP27:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i1> [[TMP14]], <2 x i1> zeroinitializer +; CHECK-IC1-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP27]], i32 0 +; CHECK-IC1-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]] +; CHECK-IC1: [[PRED_LOAD_IF7]]: +; CHECK-IC1-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] +; CHECK-IC1-NEXT: [[TMP30:%.*]] = load float, ptr [[TMP29]], align 4 +; CHECK-IC1-NEXT: [[TMP31:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i32 0 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE8]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE8]]: +; CHECK-IC1-NEXT: [[TMP32:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP31]], %[[PRED_LOAD_IF7]] ] +; CHECK-IC1-NEXT: [[TMP33:%.*]] = extractelement <2 x i1> [[TMP27]], i32 1 +; CHECK-IC1-NEXT: br i1 [[TMP33]], label %[[PRED_LOAD_IF9:.*]], label %[[PRED_LOAD_CONTINUE10]] +; CHECK-IC1: [[PRED_LOAD_IF9]]: +; CHECK-IC1-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] +; CHECK-IC1-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4 +; CHECK-IC1-NEXT: [[TMP36:%.*]] = insertelement <2 x float> [[TMP32]], float [[TMP35]], i32 1 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE10]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE10]]: +; CHECK-IC1-NEXT: [[TMP37:%.*]] = phi <2 x float> [ [[TMP32]], %[[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], %[[PRED_LOAD_IF9]] ] +; CHECK-IC1-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP16]], <2 x float> [[TMP26]], <2 x float> [[TMP37]] +; CHECK-IC1-NEXT: [[PREDPHI11:%.*]] = select <2 x i1> [[TMP0]], <2 x float> [[TMP12]], <2 x float> [[PREDPHI]] +; CHECK-IC1-NEXT: [[TMP38:%.*]] = fmul <2 x float> [[PREDPHI11]], [[PREDPHI11]] +; CHECK-IC1-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[TMP1]] +; CHECK-IC1-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, ptr [[TMP39]], i32 0 +; CHECK-IC1-NEXT: store <2 x float> [[TMP38]], ptr [[TMP40]], align 4 +; CHECK-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-IC1-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-IC1-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[TMP41]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-IC1: [[MIDDLE_BLOCK]]: +; CHECK-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC1: [[SCALAR_PH]]: +; CHECK-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC1-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC1: [[LOOP_HEADER]]: +; CHECK-IC1-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC1-NEXT: br i1 [[UNIFORM_COND]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]] +; CHECK-IC1: [[IF_TRUE]]: +; CHECK-IC1-NEXT: [[MOD:%.*]] = urem i64 [[I]], 3 +; CHECK-IC1-NEXT: [[DIVERGANT_COND:%.*]] = icmp eq i64 [[MOD]], 0 +; CHECK-IC1-NEXT: br i1 [[DIVERGANT_COND]], label %[[IF_TRUE_TRUE:.*]], label %[[IF_TRUE_FALSE:.*]] +; CHECK-IC1: [[IF_TRUE_TRUE]]: +; CHECK-IC1-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC1-NEXT: [[A_LOAD:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[IF_TRUE_FALSE]]: +; CHECK-IC1-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC1-NEXT: [[B_LOAD:%.*]] = load float, ptr [[B_ADDR]], align 4 +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[IF_FALSE]]: +; CHECK-IC1-NEXT: [[C_ADDR:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[I]] +; CHECK-IC1-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C_ADDR]], align 4 +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[LOOP_LATCH]]: +; CHECK-IC1-NEXT: [[LOAD:%.*]] = phi float [ [[A_LOAD]], %[[IF_TRUE_TRUE]] ], [ [[B_LOAD]], %[[IF_TRUE_FALSE]] ], [ [[C_LOAD]], %[[IF_FALSE]] ] +; CHECK-IC1-NEXT: [[MUL:%.*]] = fmul float [[LOAD]], [[LOAD]] +; CHECK-IC1-NEXT: [[D_ADDR:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[I]] +; CHECK-IC1-NEXT: store float [[MUL]], ptr [[D_ADDR]], align 4 +; CHECK-IC1-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-IC1: [[EXIT]]: +; CHECK-IC1-NEXT: ret void +; +; CHECK-IC2-LABEL: define void @test_no_dedicated_join_block( +; CHECK-IC2-SAME: i64 [[N:%.*]], i1 [[UNIFORM_COND:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr noalias [[D:%.*]]) { +; CHECK-IC2-NEXT: [[ENTRY:.*]]: +; CHECK-IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC2: [[VECTOR_PH]]: +; CHECK-IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[UNIFORM_COND]], i64 0 +; CHECK-IC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-IC2-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC2: [[VECTOR_BODY]]: +; CHECK-IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE22:.*]] ] +; CHECK-IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE22]] ] +; CHECK-IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 +; CHECK-IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 +; CHECK-IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 +; CHECK-IC2-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK-IC2: [[PRED_LOAD_IF]]: +; CHECK-IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP1]] +; CHECK-IC2-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP6]], align 4 +; CHECK-IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE]]: +; CHECK-IC2-NEXT: [[TMP9:%.*]] = phi <2 x float> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ] +; CHECK-IC2-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] +; CHECK-IC2: [[PRED_LOAD_IF1]]: +; CHECK-IC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 +; CHECK-IC2-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP12]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE2]]: +; CHECK-IC2-NEXT: [[TMP14:%.*]] = phi <2 x float> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], %[[PRED_LOAD_IF1]] ] +; CHECK-IC2-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP15]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK-IC2: [[PRED_LOAD_IF3]]: +; CHECK-IC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4 +; CHECK-IC2-NEXT: [[TMP18:%.*]] = insertelement <2 x float> poison, float [[TMP17]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE4]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE4]]: +; CHECK-IC2-NEXT: [[TMP19:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], %[[PRED_LOAD_IF3]] ] +; CHECK-IC2-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] +; CHECK-IC2: [[PRED_LOAD_IF5]]: +; CHECK-IC2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP4]] +; CHECK-IC2-NEXT: [[TMP22:%.*]] = load float, ptr [[TMP21]], align 4 +; CHECK-IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP22]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE6]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE6]]: +; CHECK-IC2-NEXT: [[TMP24:%.*]] = phi <2 x float> [ [[TMP19]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], %[[PRED_LOAD_IF5]] ] +; CHECK-IC2-NEXT: [[TMP25:%.*]] = urem <2 x i64> [[VEC_IND]], splat (i64 3) +; CHECK-IC2-NEXT: [[TMP26:%.*]] = urem <2 x i64> [[STEP_ADD]], splat (i64 3) +; CHECK-IC2-NEXT: [[TMP27:%.*]] = icmp eq <2 x i64> [[TMP25]], zeroinitializer +; CHECK-IC2-NEXT: [[TMP28:%.*]] = icmp eq <2 x i64> [[TMP26]], zeroinitializer +; CHECK-IC2-NEXT: [[TMP29:%.*]] = xor <2 x i1> [[TMP27]], splat (i1 true) +; CHECK-IC2-NEXT: [[TMP30:%.*]] = xor <2 x i1> [[TMP28]], splat (i1 true) +; CHECK-IC2-NEXT: [[TMP31:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i1> [[TMP29]], <2 x i1> zeroinitializer +; CHECK-IC2-NEXT: [[TMP32:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i1> [[TMP30]], <2 x i1> zeroinitializer +; CHECK-IC2-NEXT: [[TMP33:%.*]] = extractelement <2 x i1> [[TMP31]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP33]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]] +; CHECK-IC2: [[PRED_LOAD_IF7]]: +; CHECK-IC2-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP1]] +; CHECK-IC2-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP34]], align 4 +; CHECK-IC2-NEXT: [[TMP36:%.*]] = insertelement <2 x float> poison, float [[TMP35]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE8]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE8]]: +; CHECK-IC2-NEXT: [[TMP37:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP36]], %[[PRED_LOAD_IF7]] ] +; CHECK-IC2-NEXT: [[TMP38:%.*]] = extractelement <2 x i1> [[TMP31]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP38]], label %[[PRED_LOAD_IF9:.*]], label %[[PRED_LOAD_CONTINUE10:.*]] +; CHECK-IC2: [[PRED_LOAD_IF9]]: +; CHECK-IC2-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP40:%.*]] = load float, ptr [[TMP39]], align 4 +; CHECK-IC2-NEXT: [[TMP41:%.*]] = insertelement <2 x float> [[TMP37]], float [[TMP40]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE10]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE10]]: +; CHECK-IC2-NEXT: [[TMP42:%.*]] = phi <2 x float> [ [[TMP37]], %[[PRED_LOAD_CONTINUE8]] ], [ [[TMP41]], %[[PRED_LOAD_IF9]] ] +; CHECK-IC2-NEXT: [[TMP43:%.*]] = extractelement <2 x i1> [[TMP32]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP43]], label %[[PRED_LOAD_IF11:.*]], label %[[PRED_LOAD_CONTINUE12:.*]] +; CHECK-IC2: [[PRED_LOAD_IF11]]: +; CHECK-IC2-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP45:%.*]] = load float, ptr [[TMP44]], align 4 +; CHECK-IC2-NEXT: [[TMP46:%.*]] = insertelement <2 x float> poison, float [[TMP45]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE12]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE12]]: +; CHECK-IC2-NEXT: [[TMP47:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE10]] ], [ [[TMP46]], %[[PRED_LOAD_IF11]] ] +; CHECK-IC2-NEXT: [[TMP48:%.*]] = extractelement <2 x i1> [[TMP32]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP48]], label %[[PRED_LOAD_IF13:.*]], label %[[PRED_LOAD_CONTINUE14:.*]] +; CHECK-IC2: [[PRED_LOAD_IF13]]: +; CHECK-IC2-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] +; CHECK-IC2-NEXT: [[TMP50:%.*]] = load float, ptr [[TMP49]], align 4 +; CHECK-IC2-NEXT: [[TMP51:%.*]] = insertelement <2 x float> [[TMP47]], float [[TMP50]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE14]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE14]]: +; CHECK-IC2-NEXT: [[TMP52:%.*]] = phi <2 x float> [ [[TMP47]], %[[PRED_LOAD_CONTINUE12]] ], [ [[TMP51]], %[[PRED_LOAD_IF13]] ] +; CHECK-IC2-NEXT: [[TMP53:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i1> [[TMP27]], <2 x i1> zeroinitializer +; CHECK-IC2-NEXT: [[TMP54:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i1> [[TMP28]], <2 x i1> zeroinitializer +; CHECK-IC2-NEXT: [[TMP55:%.*]] = extractelement <2 x i1> [[TMP53]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP55]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]] +; CHECK-IC2: [[PRED_LOAD_IF15]]: +; CHECK-IC2-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] +; CHECK-IC2-NEXT: [[TMP57:%.*]] = load float, ptr [[TMP56]], align 4 +; CHECK-IC2-NEXT: [[TMP58:%.*]] = insertelement <2 x float> poison, float [[TMP57]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE16]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE16]]: +; CHECK-IC2-NEXT: [[TMP59:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE14]] ], [ [[TMP58]], %[[PRED_LOAD_IF15]] ] +; CHECK-IC2-NEXT: [[TMP60:%.*]] = extractelement <2 x i1> [[TMP53]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP60]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]] +; CHECK-IC2: [[PRED_LOAD_IF17]]: +; CHECK-IC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP62:%.*]] = load float, ptr [[TMP61]], align 4 +; CHECK-IC2-NEXT: [[TMP63:%.*]] = insertelement <2 x float> [[TMP59]], float [[TMP62]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE18]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE18]]: +; CHECK-IC2-NEXT: [[TMP64:%.*]] = phi <2 x float> [ [[TMP59]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP63]], %[[PRED_LOAD_IF17]] ] +; CHECK-IC2-NEXT: [[TMP65:%.*]] = extractelement <2 x i1> [[TMP54]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP65]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]] +; CHECK-IC2: [[PRED_LOAD_IF19]]: +; CHECK-IC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP67:%.*]] = load float, ptr [[TMP66]], align 4 +; CHECK-IC2-NEXT: [[TMP68:%.*]] = insertelement <2 x float> poison, float [[TMP67]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE20]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE20]]: +; CHECK-IC2-NEXT: [[TMP69:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP68]], %[[PRED_LOAD_IF19]] ] +; CHECK-IC2-NEXT: [[TMP70:%.*]] = extractelement <2 x i1> [[TMP54]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP70]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22]] +; CHECK-IC2: [[PRED_LOAD_IF21]]: +; CHECK-IC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] +; CHECK-IC2-NEXT: [[TMP72:%.*]] = load float, ptr [[TMP71]], align 4 +; CHECK-IC2-NEXT: [[TMP73:%.*]] = insertelement <2 x float> [[TMP69]], float [[TMP72]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE22]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE22]]: +; CHECK-IC2-NEXT: [[TMP74:%.*]] = phi <2 x float> [ [[TMP69]], %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP73]], %[[PRED_LOAD_IF21]] ] +; CHECK-IC2-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP31]], <2 x float> [[TMP42]], <2 x float> [[TMP64]] +; CHECK-IC2-NEXT: [[PREDPHI23:%.*]] = select <2 x i1> [[TMP0]], <2 x float> [[TMP14]], <2 x float> [[PREDPHI]] +; CHECK-IC2-NEXT: [[PREDPHI24:%.*]] = select <2 x i1> [[TMP32]], <2 x float> [[TMP52]], <2 x float> [[TMP74]] +; CHECK-IC2-NEXT: [[PREDPHI25:%.*]] = select <2 x i1> [[TMP0]], <2 x float> [[TMP24]], <2 x float> [[PREDPHI24]] +; CHECK-IC2-NEXT: [[TMP75:%.*]] = fmul <2 x float> [[PREDPHI23]], [[PREDPHI23]] +; CHECK-IC2-NEXT: [[TMP76:%.*]] = fmul <2 x float> [[PREDPHI25]], [[PREDPHI25]] +; CHECK-IC2-NEXT: [[TMP77:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[TMP1]] +; CHECK-IC2-NEXT: [[TMP78:%.*]] = getelementptr inbounds float, ptr [[TMP77]], i32 0 +; CHECK-IC2-NEXT: [[TMP79:%.*]] = getelementptr inbounds float, ptr [[TMP77]], i32 2 +; CHECK-IC2-NEXT: store <2 x float> [[TMP75]], ptr [[TMP78]], align 4 +; CHECK-IC2-NEXT: store <2 x float> [[TMP76]], ptr [[TMP79]], align 4 +; CHECK-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-IC2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-IC2-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[TMP80]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-IC2: [[MIDDLE_BLOCK]]: +; CHECK-IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC2: [[SCALAR_PH]]: +; CHECK-IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC2-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC2: [[LOOP_HEADER]]: +; CHECK-IC2-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC2-NEXT: br i1 [[UNIFORM_COND]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]] +; CHECK-IC2: [[IF_TRUE]]: +; CHECK-IC2-NEXT: [[MOD:%.*]] = urem i64 [[I]], 3 +; CHECK-IC2-NEXT: [[DIVERGANT_COND:%.*]] = icmp eq i64 [[MOD]], 0 +; CHECK-IC2-NEXT: br i1 [[DIVERGANT_COND]], label %[[IF_TRUE_TRUE:.*]], label %[[IF_TRUE_FALSE:.*]] +; CHECK-IC2: [[IF_TRUE_TRUE]]: +; CHECK-IC2-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC2-NEXT: [[A_LOAD:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[IF_TRUE_FALSE]]: +; CHECK-IC2-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC2-NEXT: [[B_LOAD:%.*]] = load float, ptr [[B_ADDR]], align 4 +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[IF_FALSE]]: +; CHECK-IC2-NEXT: [[C_ADDR:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[I]] +; CHECK-IC2-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C_ADDR]], align 4 +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[LOOP_LATCH]]: +; CHECK-IC2-NEXT: [[LOAD:%.*]] = phi float [ [[A_LOAD]], %[[IF_TRUE_TRUE]] ], [ [[B_LOAD]], %[[IF_TRUE_FALSE]] ], [ [[C_LOAD]], %[[IF_FALSE]] ] +; CHECK-IC2-NEXT: [[MUL:%.*]] = fmul float [[LOAD]], [[LOAD]] +; CHECK-IC2-NEXT: [[D_ADDR:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[I]] +; CHECK-IC2-NEXT: store float [[MUL]], ptr [[D_ADDR]], align 4 +; CHECK-IC2-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-IC2: [[EXIT]]: +; CHECK-IC2-NEXT: ret void +; + i64 %N, i1 %uniform_cond, ptr %A, ptr %B, ptr %C, ptr noalias %D) { +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ] + br i1 %uniform_cond, label %if.true, label %if.false + +if.true: + %mod = urem i64 %i, 3 + %divergant_cond = icmp eq i64 %mod, 0 + br i1 %divergant_cond, label %if.true.true, label %if.true.false + +if.true.true: + %a.addr = getelementptr inbounds float, ptr %A, i64 %i + %a.load = load float, ptr %a.addr, align 4 + br label %loop.latch + +if.true.false: + %b.addr = getelementptr inbounds float, ptr %B, i64 %i + %b.load = load float, ptr %b.addr, align 4 + br label %loop.latch + +if.false: + %c.addr = getelementptr inbounds float, ptr %C, i64 %i + %c.load = load float, ptr %c.addr, align 4 + br label %loop.latch + +loop.latch: + %load = phi float [ %a.load, %if.true.true ], [ %b.load, %if.true.false ], [ %c.load, %if.false ] + %mul = fmul float %load, %load + %d.addr = getelementptr inbounds float, ptr %D, i64 %i + store float %mul, ptr %d.addr, align 4 + %i.next = add nuw i64 %i, 1 + %cmp = icmp eq i64 %i.next, %N + br i1 %cmp, label %exit, label %loop.header + +exit: + ret void +} + +; Here, the two sides of the branch at the head of loop.header do not form +; seperate single-entry single-exit subregions because of a latch from +; if.true.true to if.false. +define void @test_not_single_entry_single_exit( +; CHECK-IC1-LABEL: define void @test_not_single_entry_single_exit( +; CHECK-IC1-SAME: i64 [[N:%.*]], i1 [[UNIFORM_COND1:%.*]], i1 [[UNIFORM_COND2:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr noalias [[D:%.*]]) { +; CHECK-IC1-NEXT: [[ENTRY:.*]]: +; CHECK-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC1: [[VECTOR_PH]]: +; CHECK-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[UNIFORM_COND2]], i64 0 +; CHECK-IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-IC1-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-IC1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i1> poison, i1 [[UNIFORM_COND1]], i64 0 +; CHECK-IC1-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT1]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-IC1-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC1: [[VECTOR_BODY]]: +; CHECK-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE12:.*]] ] +; CHECK-IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE12]] ] +; CHECK-IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 +; CHECK-IC1-NEXT: [[TMP4:%.*]] = urem <2 x i64> [[VEC_IND]], splat (i64 3) +; CHECK-IC1-NEXT: [[TMP5:%.*]] = icmp eq <2 x i64> [[TMP4]], zeroinitializer +; CHECK-IC1-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[TMP5]], splat (i1 true) +; CHECK-IC1-NEXT: [[TMP7:%.*]] = select <2 x i1> [[BROADCAST_SPLAT2]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer +; CHECK-IC1-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 +; CHECK-IC1-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK-IC1: [[PRED_LOAD_IF]]: +; CHECK-IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]] +; CHECK-IC1-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +; CHECK-IC1-NEXT: [[TMP11:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i32 0 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE]]: +; CHECK-IC1-NEXT: [[TMP12:%.*]] = phi <2 x float> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ] +; CHECK-IC1-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 +; CHECK-IC1-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK-IC1: [[PRED_LOAD_IF3]]: +; CHECK-IC1-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]] +; CHECK-IC1-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP14]], align 4 +; CHECK-IC1-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP15]], i32 1 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE4]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE4]]: +; CHECK-IC1-NEXT: [[TMP17:%.*]] = phi <2 x float> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF3]] ] +; CHECK-IC1-NEXT: [[TMP18:%.*]] = select <2 x i1> [[BROADCAST_SPLAT2]], <2 x i1> [[TMP5]], <2 x i1> zeroinitializer +; CHECK-IC1-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP18]], i32 0 +; CHECK-IC1-NEXT: br i1 [[TMP19]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] +; CHECK-IC1: [[PRED_LOAD_IF5]]: +; CHECK-IC1-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] +; CHECK-IC1-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP20]], align 4 +; CHECK-IC1-NEXT: [[TMP22:%.*]] = insertelement <2 x float> poison, float [[TMP21]], i32 0 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE6]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE6]]: +; CHECK-IC1-NEXT: [[TMP23:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], %[[PRED_LOAD_IF5]] ] +; CHECK-IC1-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP18]], i32 1 +; CHECK-IC1-NEXT: br i1 [[TMP24]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]] +; CHECK-IC1: [[PRED_LOAD_IF7]]: +; CHECK-IC1-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]] +; CHECK-IC1-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4 +; CHECK-IC1-NEXT: [[TMP27:%.*]] = insertelement <2 x float> [[TMP23]], float [[TMP26]], i32 1 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE8]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE8]]: +; CHECK-IC1-NEXT: [[TMP28:%.*]] = phi <2 x float> [ [[TMP23]], %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], %[[PRED_LOAD_IF7]] ] +; CHECK-IC1-NEXT: [[TMP29:%.*]] = select <2 x i1> [[TMP18]], <2 x i1> [[TMP0]], <2 x i1> zeroinitializer +; CHECK-IC1-NEXT: [[TMP30:%.*]] = or <2 x i1> [[TMP29]], [[TMP1]] +; CHECK-IC1-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x float> zeroinitializer, <2 x float> [[TMP28]] +; CHECK-IC1-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP30]], i32 0 +; CHECK-IC1-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF9:.*]], label %[[PRED_LOAD_CONTINUE10:.*]] +; CHECK-IC1: [[PRED_LOAD_IF9]]: +; CHECK-IC1-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP2]] +; CHECK-IC1-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4 +; CHECK-IC1-NEXT: [[TMP34:%.*]] = insertelement <2 x float> poison, float [[TMP33]], i32 0 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE10]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE10]]: +; CHECK-IC1-NEXT: [[TMP35:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE8]] ], [ [[TMP34]], %[[PRED_LOAD_IF9]] ] +; CHECK-IC1-NEXT: [[TMP36:%.*]] = extractelement <2 x i1> [[TMP30]], i32 1 +; CHECK-IC1-NEXT: br i1 [[TMP36]], label %[[PRED_LOAD_IF11:.*]], label %[[PRED_LOAD_CONTINUE12]] +; CHECK-IC1: [[PRED_LOAD_IF11]]: +; CHECK-IC1-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP3]] +; CHECK-IC1-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP37]], align 4 +; CHECK-IC1-NEXT: [[TMP39:%.*]] = insertelement <2 x float> [[TMP35]], float [[TMP38]], i32 1 +; CHECK-IC1-NEXT: br label %[[PRED_LOAD_CONTINUE12]] +; CHECK-IC1: [[PRED_LOAD_CONTINUE12]]: +; CHECK-IC1-NEXT: [[TMP40:%.*]] = phi <2 x float> [ [[TMP35]], %[[PRED_LOAD_CONTINUE10]] ], [ [[TMP39]], %[[PRED_LOAD_IF11]] ] +; CHECK-IC1-NEXT: [[TMP41:%.*]] = fadd <2 x float> [[TMP40]], [[PREDPHI]] +; CHECK-IC1-NEXT: [[PREDPHI13:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP17]], <2 x float> [[TMP28]] +; CHECK-IC1-NEXT: [[PREDPHI14:%.*]] = select <2 x i1> [[TMP30]], <2 x float> [[TMP41]], <2 x float> [[PREDPHI13]] +; CHECK-IC1-NEXT: [[TMP42:%.*]] = fmul <2 x float> [[PREDPHI14]], [[PREDPHI14]] +; CHECK-IC1-NEXT: [[TMP43:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[TMP2]] +; CHECK-IC1-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, ptr [[TMP43]], i32 0 +; CHECK-IC1-NEXT: store <2 x float> [[TMP42]], ptr [[TMP44]], align 4 +; CHECK-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-IC1-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-IC1-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[TMP45]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-IC1: [[MIDDLE_BLOCK]]: +; CHECK-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC1: [[SCALAR_PH]]: +; CHECK-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC1-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC1: [[LOOP_HEADER]]: +; CHECK-IC1-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC1-NEXT: br i1 [[UNIFORM_COND1]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]] +; CHECK-IC1: [[IF_TRUE]]: +; CHECK-IC1-NEXT: [[MOD:%.*]] = urem i64 [[I]], 3 +; CHECK-IC1-NEXT: [[DIVERGANT_COND:%.*]] = icmp eq i64 [[MOD]], 0 +; CHECK-IC1-NEXT: br i1 [[DIVERGANT_COND]], label %[[IF_TRUE_TRUE:.*]], label %[[IF_TRUE_FALSE:.*]] +; CHECK-IC1: [[IF_TRUE_TRUE]]: +; CHECK-IC1-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC1-NEXT: [[A_LOAD:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC1-NEXT: br i1 [[UNIFORM_COND2]], label %[[LOOP_LATCH]], label %[[IF_FALSE]] +; CHECK-IC1: [[IF_TRUE_FALSE]]: +; CHECK-IC1-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC1-NEXT: [[B_LOAD:%.*]] = load float, ptr [[B_ADDR]], align 4 +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[IF_FALSE]]: +; CHECK-IC1-NEXT: [[A_OR_ZERO:%.*]] = phi float [ [[A_LOAD]], %[[IF_TRUE_TRUE]] ], [ 0.000000e+00, %[[LOOP_HEADER]] ] +; CHECK-IC1-NEXT: [[C_ADDR:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[I]] +; CHECK-IC1-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C_ADDR]], align 4 +; CHECK-IC1-NEXT: [[C_VAL:%.*]] = fadd float [[C_LOAD]], [[A_OR_ZERO]] +; CHECK-IC1-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC1: [[LOOP_LATCH]]: +; CHECK-IC1-NEXT: [[VAL:%.*]] = phi float [ [[C_VAL]], %[[IF_FALSE]] ], [ [[A_LOAD]], %[[IF_TRUE_TRUE]] ], [ [[B_LOAD]], %[[IF_TRUE_FALSE]] ] +; CHECK-IC1-NEXT: [[MUL:%.*]] = fmul float [[VAL]], [[VAL]] +; CHECK-IC1-NEXT: [[D_ADDR:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[I]] +; CHECK-IC1-NEXT: store float [[MUL]], ptr [[D_ADDR]], align 4 +; CHECK-IC1-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-IC1: [[EXIT]]: +; CHECK-IC1-NEXT: ret void +; +; CHECK-IC2-LABEL: define void @test_not_single_entry_single_exit( +; CHECK-IC2-SAME: i64 [[N:%.*]], i1 [[UNIFORM_COND1:%.*]], i1 [[UNIFORM_COND2:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr noalias [[D:%.*]]) { +; CHECK-IC2-NEXT: [[ENTRY:.*]]: +; CHECK-IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-IC2: [[VECTOR_PH]]: +; CHECK-IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[UNIFORM_COND2]], i64 0 +; CHECK-IC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-IC2-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-IC2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i1> poison, i1 [[UNIFORM_COND1]], i64 0 +; CHECK-IC2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT1]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-IC2-NEXT: [[TMP1:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-IC2: [[VECTOR_BODY]]: +; CHECK-IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE25:.*]] ] +; CHECK-IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE25]] ] +; CHECK-IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; CHECK-IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 +; CHECK-IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 +; CHECK-IC2-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 3 +; CHECK-IC2-NEXT: [[TMP6:%.*]] = urem <2 x i64> [[VEC_IND]], splat (i64 3) +; CHECK-IC2-NEXT: [[TMP7:%.*]] = urem <2 x i64> [[STEP_ADD]], splat (i64 3) +; CHECK-IC2-NEXT: [[TMP8:%.*]] = icmp eq <2 x i64> [[TMP6]], zeroinitializer +; CHECK-IC2-NEXT: [[TMP9:%.*]] = icmp eq <2 x i64> [[TMP7]], zeroinitializer +; CHECK-IC2-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP8]], splat (i1 true) +; CHECK-IC2-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true) +; CHECK-IC2-NEXT: [[TMP12:%.*]] = select <2 x i1> [[BROADCAST_SPLAT2]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer +; CHECK-IC2-NEXT: [[TMP13:%.*]] = select <2 x i1> [[BROADCAST_SPLAT2]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer +; CHECK-IC2-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK-IC2: [[PRED_LOAD_IF]]: +; CHECK-IC2-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 +; CHECK-IC2-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP16]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE]]: +; CHECK-IC2-NEXT: [[TMP18:%.*]] = phi <2 x float> [ poison, %[[VECTOR_BODY]] ], [ [[TMP17]], %[[PRED_LOAD_IF]] ] +; CHECK-IC2-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP19]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK-IC2: [[PRED_LOAD_IF3]]: +; CHECK-IC2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP20]], align 4 +; CHECK-IC2-NEXT: [[TMP22:%.*]] = insertelement <2 x float> [[TMP18]], float [[TMP21]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE4]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE4]]: +; CHECK-IC2-NEXT: [[TMP23:%.*]] = phi <2 x float> [ [[TMP18]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP22]], %[[PRED_LOAD_IF3]] ] +; CHECK-IC2-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP24]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]] +; CHECK-IC2: [[PRED_LOAD_IF5]]: +; CHECK-IC2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] +; CHECK-IC2-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4 +; CHECK-IC2-NEXT: [[TMP27:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE6]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE6]]: +; CHECK-IC2-NEXT: [[TMP28:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], %[[PRED_LOAD_IF5]] ] +; CHECK-IC2-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP29]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]] +; CHECK-IC2: [[PRED_LOAD_IF7]]: +; CHECK-IC2-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]] +; CHECK-IC2-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP30]], align 4 +; CHECK-IC2-NEXT: [[TMP32:%.*]] = insertelement <2 x float> [[TMP28]], float [[TMP31]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE8]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE8]]: +; CHECK-IC2-NEXT: [[TMP33:%.*]] = phi <2 x float> [ [[TMP28]], %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP32]], %[[PRED_LOAD_IF7]] ] +; CHECK-IC2-NEXT: [[TMP34:%.*]] = select <2 x i1> [[BROADCAST_SPLAT2]], <2 x i1> [[TMP8]], <2 x i1> zeroinitializer +; CHECK-IC2-NEXT: [[TMP35:%.*]] = select <2 x i1> [[BROADCAST_SPLAT2]], <2 x i1> [[TMP9]], <2 x i1> zeroinitializer +; CHECK-IC2-NEXT: [[TMP36:%.*]] = extractelement <2 x i1> [[TMP34]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP36]], label %[[PRED_LOAD_IF9:.*]], label %[[PRED_LOAD_CONTINUE10:.*]] +; CHECK-IC2: [[PRED_LOAD_IF9]]: +; CHECK-IC2-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP37]], align 4 +; CHECK-IC2-NEXT: [[TMP39:%.*]] = insertelement <2 x float> poison, float [[TMP38]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE10]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE10]]: +; CHECK-IC2-NEXT: [[TMP40:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE8]] ], [ [[TMP39]], %[[PRED_LOAD_IF9]] ] +; CHECK-IC2-NEXT: [[TMP41:%.*]] = extractelement <2 x i1> [[TMP34]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP41]], label %[[PRED_LOAD_IF11:.*]], label %[[PRED_LOAD_CONTINUE12:.*]] +; CHECK-IC2: [[PRED_LOAD_IF11]]: +; CHECK-IC2-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP43:%.*]] = load float, ptr [[TMP42]], align 4 +; CHECK-IC2-NEXT: [[TMP44:%.*]] = insertelement <2 x float> [[TMP40]], float [[TMP43]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE12]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE12]]: +; CHECK-IC2-NEXT: [[TMP45:%.*]] = phi <2 x float> [ [[TMP40]], %[[PRED_LOAD_CONTINUE10]] ], [ [[TMP44]], %[[PRED_LOAD_IF11]] ] +; CHECK-IC2-NEXT: [[TMP46:%.*]] = extractelement <2 x i1> [[TMP35]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP46]], label %[[PRED_LOAD_IF13:.*]], label %[[PRED_LOAD_CONTINUE14:.*]] +; CHECK-IC2: [[PRED_LOAD_IF13]]: +; CHECK-IC2-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] +; CHECK-IC2-NEXT: [[TMP48:%.*]] = load float, ptr [[TMP47]], align 4 +; CHECK-IC2-NEXT: [[TMP49:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE14]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE14]]: +; CHECK-IC2-NEXT: [[TMP50:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE12]] ], [ [[TMP49]], %[[PRED_LOAD_IF13]] ] +; CHECK-IC2-NEXT: [[TMP51:%.*]] = extractelement <2 x i1> [[TMP35]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP51]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]] +; CHECK-IC2: [[PRED_LOAD_IF15]]: +; CHECK-IC2-NEXT: [[TMP52:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]] +; CHECK-IC2-NEXT: [[TMP53:%.*]] = load float, ptr [[TMP52]], align 4 +; CHECK-IC2-NEXT: [[TMP54:%.*]] = insertelement <2 x float> [[TMP50]], float [[TMP53]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE16]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE16]]: +; CHECK-IC2-NEXT: [[TMP55:%.*]] = phi <2 x float> [ [[TMP50]], %[[PRED_LOAD_CONTINUE14]] ], [ [[TMP54]], %[[PRED_LOAD_IF15]] ] +; CHECK-IC2-NEXT: [[TMP56:%.*]] = select <2 x i1> [[TMP34]], <2 x i1> [[TMP0]], <2 x i1> zeroinitializer +; CHECK-IC2-NEXT: [[TMP57:%.*]] = select <2 x i1> [[TMP35]], <2 x i1> [[TMP0]], <2 x i1> zeroinitializer +; CHECK-IC2-NEXT: [[TMP58:%.*]] = or <2 x i1> [[TMP56]], [[TMP1]] +; CHECK-IC2-NEXT: [[TMP59:%.*]] = or <2 x i1> [[TMP57]], [[TMP1]] +; CHECK-IC2-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x float> zeroinitializer, <2 x float> [[TMP45]] +; CHECK-IC2-NEXT: [[PREDPHI17:%.*]] = select <2 x i1> [[TMP1]], <2 x float> zeroinitializer, <2 x float> [[TMP55]] +; CHECK-IC2-NEXT: [[TMP60:%.*]] = extractelement <2 x i1> [[TMP58]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP60]], label %[[PRED_LOAD_IF18:.*]], label %[[PRED_LOAD_CONTINUE19:.*]] +; CHECK-IC2: [[PRED_LOAD_IF18]]: +; CHECK-IC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP62:%.*]] = load float, ptr [[TMP61]], align 4 +; CHECK-IC2-NEXT: [[TMP63:%.*]] = insertelement <2 x float> poison, float [[TMP62]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE19]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE19]]: +; CHECK-IC2-NEXT: [[TMP64:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP63]], %[[PRED_LOAD_IF18]] ] +; CHECK-IC2-NEXT: [[TMP65:%.*]] = extractelement <2 x i1> [[TMP58]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP65]], label %[[PRED_LOAD_IF20:.*]], label %[[PRED_LOAD_CONTINUE21:.*]] +; CHECK-IC2: [[PRED_LOAD_IF20]]: +; CHECK-IC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP3]] +; CHECK-IC2-NEXT: [[TMP67:%.*]] = load float, ptr [[TMP66]], align 4 +; CHECK-IC2-NEXT: [[TMP68:%.*]] = insertelement <2 x float> [[TMP64]], float [[TMP67]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE21]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE21]]: +; CHECK-IC2-NEXT: [[TMP69:%.*]] = phi <2 x float> [ [[TMP64]], %[[PRED_LOAD_CONTINUE19]] ], [ [[TMP68]], %[[PRED_LOAD_IF20]] ] +; CHECK-IC2-NEXT: [[TMP70:%.*]] = extractelement <2 x i1> [[TMP59]], i32 0 +; CHECK-IC2-NEXT: br i1 [[TMP70]], label %[[PRED_LOAD_IF22:.*]], label %[[PRED_LOAD_CONTINUE23:.*]] +; CHECK-IC2: [[PRED_LOAD_IF22]]: +; CHECK-IC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP4]] +; CHECK-IC2-NEXT: [[TMP72:%.*]] = load float, ptr [[TMP71]], align 4 +; CHECK-IC2-NEXT: [[TMP73:%.*]] = insertelement <2 x float> poison, float [[TMP72]], i32 0 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE23]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE23]]: +; CHECK-IC2-NEXT: [[TMP74:%.*]] = phi <2 x float> [ poison, %[[PRED_LOAD_CONTINUE21]] ], [ [[TMP73]], %[[PRED_LOAD_IF22]] ] +; CHECK-IC2-NEXT: [[TMP75:%.*]] = extractelement <2 x i1> [[TMP59]], i32 1 +; CHECK-IC2-NEXT: br i1 [[TMP75]], label %[[PRED_LOAD_IF24:.*]], label %[[PRED_LOAD_CONTINUE25]] +; CHECK-IC2: [[PRED_LOAD_IF24]]: +; CHECK-IC2-NEXT: [[TMP76:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP5]] +; CHECK-IC2-NEXT: [[TMP77:%.*]] = load float, ptr [[TMP76]], align 4 +; CHECK-IC2-NEXT: [[TMP78:%.*]] = insertelement <2 x float> [[TMP74]], float [[TMP77]], i32 1 +; CHECK-IC2-NEXT: br label %[[PRED_LOAD_CONTINUE25]] +; CHECK-IC2: [[PRED_LOAD_CONTINUE25]]: +; CHECK-IC2-NEXT: [[TMP79:%.*]] = phi <2 x float> [ [[TMP74]], %[[PRED_LOAD_CONTINUE23]] ], [ [[TMP78]], %[[PRED_LOAD_IF24]] ] +; CHECK-IC2-NEXT: [[TMP80:%.*]] = fadd <2 x float> [[TMP69]], [[PREDPHI]] +; CHECK-IC2-NEXT: [[TMP81:%.*]] = fadd <2 x float> [[TMP79]], [[PREDPHI17]] +; CHECK-IC2-NEXT: [[PREDPHI26:%.*]] = select <2 x i1> [[TMP12]], <2 x float> [[TMP23]], <2 x float> [[TMP45]] +; CHECK-IC2-NEXT: [[PREDPHI27:%.*]] = select <2 x i1> [[TMP13]], <2 x float> [[TMP33]], <2 x float> [[TMP55]] +; CHECK-IC2-NEXT: [[PREDPHI28:%.*]] = select <2 x i1> [[TMP58]], <2 x float> [[TMP80]], <2 x float> [[PREDPHI26]] +; CHECK-IC2-NEXT: [[PREDPHI29:%.*]] = select <2 x i1> [[TMP59]], <2 x float> [[TMP81]], <2 x float> [[PREDPHI27]] +; CHECK-IC2-NEXT: [[TMP82:%.*]] = fmul <2 x float> [[PREDPHI28]], [[PREDPHI28]] +; CHECK-IC2-NEXT: [[TMP83:%.*]] = fmul <2 x float> [[PREDPHI29]], [[PREDPHI29]] +; CHECK-IC2-NEXT: [[TMP84:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[TMP2]] +; CHECK-IC2-NEXT: [[TMP85:%.*]] = getelementptr inbounds float, ptr [[TMP84]], i32 0 +; CHECK-IC2-NEXT: [[TMP86:%.*]] = getelementptr inbounds float, ptr [[TMP84]], i32 2 +; CHECK-IC2-NEXT: store <2 x float> [[TMP82]], ptr [[TMP85]], align 4 +; CHECK-IC2-NEXT: store <2 x float> [[TMP83]], ptr [[TMP86]], align 4 +; CHECK-IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-IC2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-IC2-NEXT: [[TMP87:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[TMP87]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-IC2: [[MIDDLE_BLOCK]]: +; CHECK-IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-IC2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-IC2: [[SCALAR_PH]]: +; CHECK-IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-IC2-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK-IC2: [[LOOP_HEADER]]: +; CHECK-IC2-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-IC2-NEXT: br i1 [[UNIFORM_COND1]], label %[[IF_TRUE:.*]], label %[[IF_FALSE:.*]] +; CHECK-IC2: [[IF_TRUE]]: +; CHECK-IC2-NEXT: [[MOD:%.*]] = urem i64 [[I]], 3 +; CHECK-IC2-NEXT: [[DIVERGANT_COND:%.*]] = icmp eq i64 [[MOD]], 0 +; CHECK-IC2-NEXT: br i1 [[DIVERGANT_COND]], label %[[IF_TRUE_TRUE:.*]], label %[[IF_TRUE_FALSE:.*]] +; CHECK-IC2: [[IF_TRUE_TRUE]]: +; CHECK-IC2-NEXT: [[A_ADDR:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] +; CHECK-IC2-NEXT: [[A_LOAD:%.*]] = load float, ptr [[A_ADDR]], align 4 +; CHECK-IC2-NEXT: br i1 [[UNIFORM_COND2]], label %[[LOOP_LATCH]], label %[[IF_FALSE]] +; CHECK-IC2: [[IF_TRUE_FALSE]]: +; CHECK-IC2-NEXT: [[B_ADDR:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[I]] +; CHECK-IC2-NEXT: [[B_LOAD:%.*]] = load float, ptr [[B_ADDR]], align 4 +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[IF_FALSE]]: +; CHECK-IC2-NEXT: [[A_OR_ZERO:%.*]] = phi float [ [[A_LOAD]], %[[IF_TRUE_TRUE]] ], [ 0.000000e+00, %[[LOOP_HEADER]] ] +; CHECK-IC2-NEXT: [[C_ADDR:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[I]] +; CHECK-IC2-NEXT: [[C_LOAD:%.*]] = load float, ptr [[C_ADDR]], align 4 +; CHECK-IC2-NEXT: [[C_VAL:%.*]] = fadd float [[C_LOAD]], [[A_OR_ZERO]] +; CHECK-IC2-NEXT: br label %[[LOOP_LATCH]] +; CHECK-IC2: [[LOOP_LATCH]]: +; CHECK-IC2-NEXT: [[VAL:%.*]] = phi float [ [[C_VAL]], %[[IF_FALSE]] ], [ [[A_LOAD]], %[[IF_TRUE_TRUE]] ], [ [[B_LOAD]], %[[IF_TRUE_FALSE]] ] +; CHECK-IC2-NEXT: [[MUL:%.*]] = fmul float [[VAL]], [[VAL]] +; CHECK-IC2-NEXT: [[D_ADDR:%.*]] = getelementptr inbounds float, ptr [[D]], i64 [[I]] +; CHECK-IC2-NEXT: store float [[MUL]], ptr [[D_ADDR]], align 4 +; CHECK-IC2-NEXT: [[I_NEXT]] = add nuw i64 [[I]], 1 +; CHECK-IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-IC2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-IC2: [[EXIT]]: +; CHECK-IC2-NEXT: ret void +; + i64 %N, i1 %uniform_cond1, i1 %uniform_cond2, ptr %A, ptr %B, ptr %C, ptr noalias %D) { +entry: + br label %loop.header + +loop.header: + %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ] + br i1 %uniform_cond1, label %if.true, label %if.false + +if.true: + %mod = urem i64 %i, 3 + %divergant_cond = icmp eq i64 %mod, 0 + br i1 %divergant_cond, label %if.true.true, label %if.true.false + +if.true.true: + %a.addr = getelementptr inbounds float, ptr %A, i64 %i + %a.load = load float, ptr %a.addr, align 4 + br i1 %uniform_cond2, label %if.true.join, label %if.false + +if.true.false: + %b.addr = getelementptr inbounds float, ptr %B, i64 %i + %b.load = load float, ptr %b.addr, align 4 + br label %if.true.join + +if.true.join: + %a.or.b.load = phi float [ %a.load, %if.true.true ], [ %b.load, %if.true.false ] + br label %loop.latch + +if.false: + %a.or.zero = phi float [ %a.load, %if.true.true ], [ 0.0, %loop.header ] + %c.addr = getelementptr inbounds float, ptr %C, i64 %i + %c.load = load float, ptr %c.addr, align 4 + %c.val = fadd float %c.load, %a.or.zero + br label %loop.latch + +loop.latch: + %val = phi float [ %a.or.b.load, %if.true.join ], [ %c.val, %if.false ] + %mul = fmul float %val, %val + %d.addr = getelementptr inbounds float, ptr %D, i64 %i + store float %mul, ptr %d.addr, align 4 + %i.next = add nuw i64 %i, 1 + %cmp = icmp eq i64 %i.next, %N + br i1 %cmp, label %exit, label %loop.header + +exit: + ret void +} +;. +; CHECK-IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK-IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK-IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK-IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; CHECK-IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK-IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; CHECK-IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK-IC1: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; CHECK-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} +; CHECK-IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]} +; CHECK-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]} +;. +; CHECK-IC2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-IC2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-IC2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-IC2: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK-IC2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK-IC2: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK-IC2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK-IC2: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; CHECK-IC2: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK-IC2: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; CHECK-IC2: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK-IC2: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; CHECK-IC2: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK-IC2: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} +; CHECK-IC2: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]} +; CHECK-IC2: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]} +;.