diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 61b5ccd85bc6e..a48299f06455e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -528,11 +528,10 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPDerivedIVSC: case VPRecipeBase::VPEVLBasedIVPHISC: case VPRecipeBase::VPExpandSCEVSC: + case VPRecipeBase::VPExpressionSC: case VPRecipeBase::VPInstructionSC: case VPRecipeBase::VPReductionEVLSC: case VPRecipeBase::VPReductionSC: - case VPRecipeBase::VPMulAccumulateReductionSC: - case VPRecipeBase::VPExtendedReductionSC: case VPRecipeBase::VPReplicateSC: case VPRecipeBase::VPScalarIVStepsSC: case VPRecipeBase::VPVectorPointerSC: @@ -852,9 +851,7 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { R->getVPDefID() == VPRecipeBase::VPReductionEVLSC || R->getVPDefID() == VPRecipeBase::VPReplicateSC || R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC || - R->getVPDefID() == VPRecipeBase::VPVectorPointerSC || - R->getVPDefID() == VPRecipeBase::VPExtendedReductionSC || - R->getVPDefID() == VPRecipeBase::VPMulAccumulateReductionSC; + R->getVPDefID() == VPRecipeBase::VPVectorPointerSC; } static inline bool classof(const VPUser *U) { @@ -2440,28 +2437,6 @@ class VPReductionRecipe : public VPRecipeWithIRFlags { setUnderlyingValue(I); } - /// For VPExtendedReductionRecipe. - /// Note that the debug location is from the extend. - VPReductionRecipe(const unsigned char SC, const RecurKind RdxKind, - ArrayRef Operands, VPValue *CondOp, - bool IsOrdered, DebugLoc DL) - : VPRecipeWithIRFlags(SC, Operands, DL), RdxKind(RdxKind), - IsOrdered(IsOrdered), IsConditional(CondOp) { - if (CondOp) - addOperand(CondOp); - } - - /// For VPMulAccumulateReductionRecipe. - /// Note that the NUW/NSW flags and the debug location are from the Mul. - VPReductionRecipe(const unsigned char SC, const RecurKind RdxKind, - ArrayRef Operands, VPValue *CondOp, - bool IsOrdered, WrapFlagsTy WrapFlags, DebugLoc DL) - : VPRecipeWithIRFlags(SC, Operands, WrapFlags, DL), RdxKind(RdxKind), - IsOrdered(IsOrdered), IsConditional(CondOp) { - if (CondOp) - addOperand(CondOp); - } - public: VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, @@ -2487,9 +2462,7 @@ class VPReductionRecipe : public VPRecipeWithIRFlags { static inline bool classof(const VPRecipeBase *R) { return R->getVPDefID() == VPRecipeBase::VPReductionSC || - R->getVPDefID() == VPRecipeBase::VPReductionEVLSC || - R->getVPDefID() == VPRecipeBase::VPExtendedReductionSC || - R->getVPDefID() == VPRecipeBase::VPMulAccumulateReductionSC; + R->getVPDefID() == VPRecipeBase::VPReductionEVLSC; } static inline bool classof(const VPUser *U) { @@ -2628,190 +2601,6 @@ class VPReductionEVLRecipe : public VPReductionRecipe { } }; -/// A recipe to represent inloop extended reduction operations, performing a -/// reduction on a extended vector operand into a scalar value, and adding the -/// result to a chain. This recipe is abstract and needs to be lowered to -/// concrete recipes before codegen. The operands are {ChainOp, VecOp, -/// [Condition]}. -class VPExtendedReductionRecipe : public VPReductionRecipe { - /// Opcode of the extend for VecOp. - Instruction::CastOps ExtOp; - - /// The scalar type after extending. - Type *ResultTy; - - /// For cloning VPExtendedReductionRecipe. - VPExtendedReductionRecipe(VPExtendedReductionRecipe *ExtRed) - : VPReductionRecipe( - VPDef::VPExtendedReductionSC, ExtRed->getRecurrenceKind(), - {ExtRed->getChainOp(), ExtRed->getVecOp()}, ExtRed->getCondOp(), - ExtRed->isOrdered(), ExtRed->getDebugLoc()), - ExtOp(ExtRed->getExtOpcode()), ResultTy(ExtRed->getResultType()) { - transferFlags(*ExtRed); - setUnderlyingValue(ExtRed->getUnderlyingValue()); - } - -public: - VPExtendedReductionRecipe(VPReductionRecipe *R, VPWidenCastRecipe *Ext) - : VPReductionRecipe(VPDef::VPExtendedReductionSC, R->getRecurrenceKind(), - {R->getChainOp(), Ext->getOperand(0)}, R->getCondOp(), - R->isOrdered(), Ext->getDebugLoc()), - ExtOp(Ext->getOpcode()), ResultTy(Ext->getResultType()) { - assert((ExtOp == Instruction::CastOps::ZExt || - ExtOp == Instruction::CastOps::SExt) && - "VPExtendedReductionRecipe only supports zext and sext."); - - transferFlags(*Ext); - setUnderlyingValue(R->getUnderlyingValue()); - } - - ~VPExtendedReductionRecipe() override = default; - - VPExtendedReductionRecipe *clone() override { - return new VPExtendedReductionRecipe(this); - } - - VP_CLASSOF_IMPL(VPDef::VPExtendedReductionSC); - - void execute(VPTransformState &State) override { - llvm_unreachable("VPExtendedReductionRecipe should be transform to " - "VPExtendedRecipe + VPReductionRecipe before execution."); - }; - - /// Return the cost of VPExtendedReductionRecipe. - InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif - - /// The scalar type after extending. - Type *getResultType() const { return ResultTy; } - - /// Is the extend ZExt? - bool isZExt() const { return getExtOpcode() == Instruction::ZExt; } - - /// Get the opcode of the extend for VecOp. - Instruction::CastOps getExtOpcode() const { return ExtOp; } -}; - -/// A recipe to represent inloop MulAccumulateReduction operations, multiplying -/// the vector operands (which may be extended), performing a reduction.add on -/// the result, and adding the scalar result to a chain. This recipe is abstract -/// and needs to be lowered to concrete recipes before codegen. The operands are -/// {ChainOp, VecOp1, VecOp2, [Condition]}. -class VPMulAccumulateReductionRecipe : public VPReductionRecipe { - /// Opcode of the extend for VecOp1 and VecOp2. - Instruction::CastOps ExtOp; - - /// Non-neg flag of the extend recipe. - bool IsNonNeg = false; - - /// The scalar type after extending. - Type *ResultTy = nullptr; - - /// For cloning VPMulAccumulateReductionRecipe. - VPMulAccumulateReductionRecipe(VPMulAccumulateReductionRecipe *MulAcc) - : VPReductionRecipe( - VPDef::VPMulAccumulateReductionSC, MulAcc->getRecurrenceKind(), - {MulAcc->getChainOp(), MulAcc->getVecOp0(), MulAcc->getVecOp1()}, - MulAcc->getCondOp(), MulAcc->isOrdered(), - WrapFlagsTy(MulAcc->hasNoUnsignedWrap(), MulAcc->hasNoSignedWrap()), - MulAcc->getDebugLoc()), - ExtOp(MulAcc->getExtOpcode()), IsNonNeg(MulAcc->isNonNeg()), - ResultTy(MulAcc->getResultType()) { - transferFlags(*MulAcc); - setUnderlyingValue(MulAcc->getUnderlyingValue()); - } - -public: - VPMulAccumulateReductionRecipe(VPReductionRecipe *R, VPWidenRecipe *Mul, - VPWidenCastRecipe *Ext0, - VPWidenCastRecipe *Ext1, Type *ResultTy) - : VPReductionRecipe( - VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind(), - {R->getChainOp(), Ext0->getOperand(0), Ext1->getOperand(0)}, - R->getCondOp(), R->isOrdered(), - WrapFlagsTy(Mul->hasNoUnsignedWrap(), Mul->hasNoSignedWrap()), - R->getDebugLoc()), - ExtOp(Ext0->getOpcode()), ResultTy(ResultTy) { - assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind()) == - Instruction::Add && - "The reduction instruction in MulAccumulateteReductionRecipe must " - "be Add"); - assert((ExtOp == Instruction::CastOps::ZExt || - ExtOp == Instruction::CastOps::SExt) && - "VPMulAccumulateReductionRecipe only supports zext and sext."); - setUnderlyingValue(R->getUnderlyingValue()); - // Only set the non-negative flag if the original recipe contains. - if (Ext0->hasNonNegFlag()) - IsNonNeg = Ext0->isNonNeg(); - } - - VPMulAccumulateReductionRecipe(VPReductionRecipe *R, VPWidenRecipe *Mul, - Type *ResultTy) - : VPReductionRecipe( - VPDef::VPMulAccumulateReductionSC, R->getRecurrenceKind(), - {R->getChainOp(), Mul->getOperand(0), Mul->getOperand(1)}, - R->getCondOp(), R->isOrdered(), - WrapFlagsTy(Mul->hasNoUnsignedWrap(), Mul->hasNoSignedWrap()), - R->getDebugLoc()), - ExtOp(Instruction::CastOps::CastOpsEnd), ResultTy(ResultTy) { - assert(RecurrenceDescriptor::getOpcode(getRecurrenceKind()) == - Instruction::Add && - "The reduction instruction in MulAccumulateReductionRecipe must be " - "Add"); - setUnderlyingValue(R->getUnderlyingValue()); - } - - ~VPMulAccumulateReductionRecipe() override = default; - - VPMulAccumulateReductionRecipe *clone() override { - return new VPMulAccumulateReductionRecipe(this); - } - - VP_CLASSOF_IMPL(VPDef::VPMulAccumulateReductionSC); - - void execute(VPTransformState &State) override { - llvm_unreachable("VPMulAccumulateReductionRecipe should transform to " - "VPWidenCastRecipe + " - "VPWidenRecipe + VPReductionRecipe before execution"); - } - - /// Return the cost of VPMulAccumulateReductionRecipe. - InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif - - Type *getResultType() const { return ResultTy; } - - /// The first vector value to be extended and reduced. - VPValue *getVecOp0() const { return getOperand(1); } - - /// The second vector value to be extended and reduced. - VPValue *getVecOp1() const { return getOperand(2); } - - /// Return true if this recipe contains extended operands. - bool isExtended() const { return ExtOp != Instruction::CastOps::CastOpsEnd; } - - /// Return the opcode of the extends for the operands. - Instruction::CastOps getExtOpcode() const { return ExtOp; } - - /// Return if the operands are zero-extended. - bool isZExt() const { return ExtOp == Instruction::CastOps::ZExt; } - - /// Return true if the operand extends have the non-negative flag. - bool isNonNeg() const { return IsNonNeg; } -}; - /// VPReplicateRecipe replicates a given instruction producing multiple scalar /// copies of the original scalar type, one per lane, instead of producing a /// single copy of widened type for all lanes. If the instruction is known to be @@ -2930,6 +2719,122 @@ class VPBranchOnMaskRecipe : public VPRecipeBase { } }; +/// A recipe to combine multiple recipes into a single 'expression' recipe, +/// which should be considered a single entity for cost-modeling and transforms. +/// The recipe needs to be 'decomposed', i.e. replaced by its individual +/// expression recipes, before execute. The individual expression recipes are +/// completely disconnected from the def-use graph of other recipes not part of +/// the expression. Def-use edges between pairs of expression recipes remain +/// intact, whereas every edge between an expression recipe and a recipe outside +/// the expression is elevated to connect the non-expression recipe with the +/// VPExpressionRecipe itself. +class VPExpressionRecipe : public VPSingleDefRecipe { + /// Recipes included in this VPExpressionRecipe. + SmallVector ExpressionRecipes; + + /// Temporary VPValues used for external operands of the expression, i.e. + /// operands not defined by recipes in the expression. + SmallVector LiveInPlaceholders; + + enum class ExpressionTypes { + /// Represents an inloop extended reduction operation, performing a + /// reduction on an extended vector operand into a scalar value, and adding + /// the result to a chain. + ExtendedReduction, + /// Represent an inloop multiply-accumulate reduction, multiplying the + /// extended vector operands, performing a reduction.add on the result, and + /// adding the scalar result to a chain. + ExtMulAccReduction, + /// Represent an inloop multiply-accumulate reduction, multiplying the + /// vector operands, performing a reduction.add on the result, and adding + /// the scalar result to a chain. + MulAccReduction, + }; + + /// Type of the expression. + ExpressionTypes ExpressionType; + + /// Construct a new VPExpressionRecipe by internalizing recipes in \p + /// ExpressionRecipes. External operands (i.e. not defined by another recipe + /// in the expression) are replaced by temporary VPValues and the original + /// operands are transferred to the VPExpressionRecipe itself. Clone recipes + /// as needed (excluding last) to ensure they are only used by other recipes + /// in the expression. + VPExpressionRecipe(ExpressionTypes ExpressionType, + ArrayRef ExpressionRecipes); + +public: + VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red) + : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {} + VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red) + : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {} + VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, + VPWidenRecipe *Mul, VPReductionRecipe *Red) + : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction, + {Ext0, Ext1, Mul, Red}) {} + + ~VPExpressionRecipe() override { + for (auto *R : reverse(ExpressionRecipes)) + delete R; + for (VPValue *T : LiveInPlaceholders) + delete T; + } + + VP_CLASSOF_IMPL(VPDef::VPExpressionSC) + + VPExpressionRecipe *clone() override { + assert(!ExpressionRecipes.empty() && "empty expressions should be removed"); + SmallVector NewExpressiondRecipes; + for (auto *R : ExpressionRecipes) + NewExpressiondRecipes.push_back(R->clone()); + for (auto *New : NewExpressiondRecipes) { + for (const auto &[Idx, Old] : enumerate(ExpressionRecipes)) + New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]); + // Update placeholder operands in the cloned recipe to use the external + // operands, to be internalized when the cloned expression is constructed. + for (const auto &[Placeholder, OutsideOp] : + zip(LiveInPlaceholders, operands())) + New->replaceUsesOfWith(Placeholder, OutsideOp); + } + return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes); + } + + /// Return the VPValue to use to infer the result type of the recipe. + VPValue *getOperandOfResultType() const { + unsigned OpIdx = + cast(ExpressionRecipes.back())->isConditional() ? 2 + : 1; + return getOperand(getNumOperands() - OpIdx); + } + + /// Insert the recipes of the expression back into the VPlan, directly before + /// the current recipe. Leaves the expression recipe empty, which must be + /// removed before codegen. + void decompose(); + + /// Method for generating code, must not be called as this recipe is abstract. + void execute(VPTransformState &State) override { + llvm_unreachable("recipe must be removed before execute"); + } + + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif + + /// Returns true if this expression contains recipes that may read from or + /// write to memory. + bool mayReadOrWriteMemory() const; + + /// Returns true if this expression contains recipes that may have side + /// effects. + bool mayHaveSideEffects() const; +}; + /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when /// control converges back from a Branch-on-Mask. The phi nodes are needed in /// order to merge values that are set under such a branch and feed their uses. diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index f3b99fe34c069..92db9674ef42b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -297,13 +297,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { // TODO: Use info from interleave group. return V->getUnderlyingValue()->getType(); }) - .Case( - [](const auto *R) { return R->getResultType(); }) .Case([](const VPExpandSCEVRecipe *R) { return R->getSCEV()->getType(); }) .Case([this](const auto *R) { return inferScalarType(R->getChainOp()); + }) + .Case([this](const auto *R) { + return inferScalarType(R->getOperandOfResultType()); }); assert(ResultTy && "could not infer type for the given VPValue"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1a38932ef99fe..f64bd2a0cb6a2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -49,6 +49,8 @@ using VectorParts = SmallVector; bool VPRecipeBase::mayWriteToMemory() const { switch (getVPDefID()) { + case VPExpressionSC: + return cast(this)->mayReadOrWriteMemory(); case VPInstructionSC: return cast(this)->opcodeMayReadOrWriteFromMemory(); case VPInterleaveSC: @@ -73,8 +75,6 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPBlendSC: case VPReductionEVLSC: case VPReductionSC: - case VPExtendedReductionSC: - case VPMulAccumulateReductionSC: case VPVectorPointerSC: case VPWidenCanonicalIVSC: case VPWidenCastSC: @@ -99,6 +99,8 @@ bool VPRecipeBase::mayWriteToMemory() const { bool VPRecipeBase::mayReadFromMemory() const { switch (getVPDefID()) { + case VPExpressionSC: + return cast(this)->mayReadOrWriteMemory(); case VPInstructionSC: return cast(this)->opcodeMayReadOrWriteFromMemory(); case VPWidenLoadEVLSC: @@ -123,8 +125,6 @@ bool VPRecipeBase::mayReadFromMemory() const { case VPBlendSC: case VPReductionEVLSC: case VPReductionSC: - case VPExtendedReductionSC: - case VPMulAccumulateReductionSC: case VPVectorPointerSC: case VPWidenCanonicalIVSC: case VPWidenCastSC: @@ -147,6 +147,8 @@ bool VPRecipeBase::mayReadFromMemory() const { bool VPRecipeBase::mayHaveSideEffects() const { switch (getVPDefID()) { + case VPExpressionSC: + return cast(this)->mayHaveSideEffects(); case VPDerivedIVSC: case VPFirstOrderRecurrencePHISC: case VPPredInstPHISC: @@ -163,8 +165,6 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPBlendSC: case VPReductionEVLSC: case VPReductionSC: - case VPExtendedReductionSC: - case VPMulAccumulateReductionSC: case VPScalarIVStepsSC: case VPVectorPointerSC: case VPWidenCanonicalIVSC: @@ -2563,30 +2563,182 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF, Ctx.CostKind); } -InstructionCost -VPExtendedReductionRecipe::computeCost(ElementCount VF, - VPCostContext &Ctx) const { - unsigned Opcode = RecurrenceDescriptor::getOpcode(getRecurrenceKind()); +VPExpressionRecipe::VPExpressionRecipe( + ExpressionTypes ExpressionType, + ArrayRef ExpressionRecipes) + : VPSingleDefRecipe(VPDef::VPExpressionSC, {}, {}), + ExpressionRecipes(SetVector( + ExpressionRecipes.begin(), ExpressionRecipes.end()) + .takeVector()), + ExpressionType(ExpressionType) { + assert(!ExpressionRecipes.empty() && "Nothing to combine?"); + assert( + none_of(ExpressionRecipes, + [](VPSingleDefRecipe *R) { return R->mayHaveSideEffects(); }) && + "expression cannot contain recipes with side-effects"); + + // Maintain a copy of the expression recipes as a set of users. + SmallPtrSet ExpressionRecipesAsSetOfUsers; + for (auto *R : ExpressionRecipes) + ExpressionRecipesAsSetOfUsers.insert(R); + + // Recipes in the expression, except the last one, must only be used by + // (other) recipes inside the expression. If there are other users, external + // to the expression, use a clone of the recipe for external users. + for (VPSingleDefRecipe *R : ExpressionRecipes) { + if (R != ExpressionRecipes.back() && + any_of(R->users(), [&ExpressionRecipesAsSetOfUsers](VPUser *U) { + return !ExpressionRecipesAsSetOfUsers.contains(U); + })) { + // There are users outside of the expression. Clone the recipe and use the + // clone those external users. + VPSingleDefRecipe *CopyForExtUsers = R->clone(); + R->replaceUsesWithIf(CopyForExtUsers, [&ExpressionRecipesAsSetOfUsers]( + VPUser &U, unsigned) { + return !ExpressionRecipesAsSetOfUsers.contains(&U); + }); + CopyForExtUsers->insertBefore(R); + } + if (R->getParent()) + R->removeFromParent(); + } + + // Internalize all external operands to the expression recipes. To do so, + // create new temporary VPValues for all operands defined by a recipe outside + // the expression. The original operands are added as operands of the + // VPExpressionRecipe itself. + for (auto *R : ExpressionRecipes) { + for (const auto &[Idx, Op] : enumerate(R->operands())) { + auto *Def = Op->getDefiningRecipe(); + if (Def && ExpressionRecipesAsSetOfUsers.contains(Def)) + continue; + addOperand(Op); + LiveInPlaceholders.push_back(new VPValue()); + R->setOperand(Idx, LiveInPlaceholders.back()); + } + } +} + +void VPExpressionRecipe::decompose() { + for (auto *R : ExpressionRecipes) + R->insertBefore(this); + + for (const auto &[Idx, Op] : enumerate(operands())) + LiveInPlaceholders[Idx]->replaceAllUsesWith(Op); + + replaceAllUsesWith(ExpressionRecipes.back()); + ExpressionRecipes.clear(); +} + +InstructionCost VPExpressionRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { Type *RedTy = Ctx.Types.inferScalarType(this); - auto *SrcVecTy = - cast(toVectorTy(Ctx.Types.inferScalarType(getVecOp()), VF)); + auto *SrcVecTy = cast( + toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF)); assert(RedTy->isIntegerTy() && - "ExtendedReduction only support integer type currently."); - return Ctx.TTI.getExtendedReductionCost(Opcode, isZExt(), RedTy, SrcVecTy, - std::nullopt, Ctx.CostKind); + "VPExpressionRecipe only supports integer types currently."); + switch (ExpressionType) { + case ExpressionTypes::ExtendedReduction: { + unsigned Opcode = RecurrenceDescriptor::getOpcode( + cast(ExpressionRecipes[1])->getRecurrenceKind()); + return Ctx.TTI.getExtendedReductionCost( + Opcode, + cast(ExpressionRecipes.front())->getOpcode() == + Instruction::ZExt, + RedTy, SrcVecTy, std::nullopt, Ctx.CostKind); + } + case ExpressionTypes::MulAccReduction: + return Ctx.TTI.getMulAccReductionCost(false, RedTy, SrcVecTy, Ctx.CostKind); + + case ExpressionTypes::ExtMulAccReduction: + return Ctx.TTI.getMulAccReductionCost( + cast(ExpressionRecipes.front())->getOpcode() == + Instruction::ZExt, + RedTy, SrcVecTy, Ctx.CostKind); + } +} + +bool VPExpressionRecipe::mayReadOrWriteMemory() const { + return any_of(ExpressionRecipes, [](VPSingleDefRecipe *R) { + return R->mayReadFromMemory() || R->mayWriteToMemory(); + }); } -InstructionCost -VPMulAccumulateReductionRecipe::computeCost(ElementCount VF, - VPCostContext &Ctx) const { - Type *RedTy = Ctx.Types.inferScalarType(this); - auto *SrcVecTy = - cast(toVectorTy(Ctx.Types.inferScalarType(getVecOp0()), VF)); - return Ctx.TTI.getMulAccReductionCost(isZExt(), RedTy, SrcVecTy, - Ctx.CostKind); +bool VPExpressionRecipe::mayHaveSideEffects() const { + assert( + none_of(ExpressionRecipes, + [](VPSingleDefRecipe *R) { return R->mayHaveSideEffects(); }) && + "expression cannot contain recipes with side-effects"); + return false; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + +void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EXPRESSION "; + printAsOperand(O, SlotTracker); + O << " = "; + auto *Red = cast(ExpressionRecipes.back()); + unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()); + + switch (ExpressionType) { + case ExpressionTypes::ExtendedReduction: { + getOperand(1)->printAsOperand(O, SlotTracker); + O << " +"; + O << " reduce." << Instruction::getOpcodeName(Opcode) << " ("; + getOperand(0)->printAsOperand(O, SlotTracker); + Red->printFlags(O); + + auto *Ext0 = cast(ExpressionRecipes[0]); + O << Instruction::getOpcodeName(Ext0->getOpcode()) << " to " + << *Ext0->getResultType(); + if (Red->isConditional()) { + O << ", "; + Red->getCondOp()->printAsOperand(O, SlotTracker); + } + O << ")"; + break; + } + case ExpressionTypes::MulAccReduction: + case ExpressionTypes::ExtMulAccReduction: { + getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker); + O << " + "; + O << "reduce." + << Instruction::getOpcodeName( + RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind())) + << " ("; + O << "mul"; + bool IsExtended = ExpressionType == ExpressionTypes::ExtMulAccReduction; + auto *Mul = cast(IsExtended ? ExpressionRecipes[2] + : ExpressionRecipes[0]); + Mul->printFlags(O); + if (IsExtended) + O << "("; + getOperand(0)->printAsOperand(O, SlotTracker); + if (IsExtended) { + auto *Ext0 = cast(ExpressionRecipes[0]); + O << " " << Instruction::getOpcodeName(Ext0->getOpcode()) << " to " + << *Ext0->getResultType() << "), ("; + } else { + O << ", "; + } + getOperand(1)->printAsOperand(O, SlotTracker); + if (IsExtended) { + auto *Ext1 = cast(ExpressionRecipes[1]); + O << " " << Instruction::getOpcodeName(Ext1->getOpcode()) << " to " + << *Ext1->getResultType() << ")"; + } + if (Red->isConditional()) { + O << ", "; + Red->getCondOp()->printAsOperand(O, SlotTracker); + } + O << ")"; + break; + } + } +} + void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "REDUCE "; @@ -2629,58 +2781,6 @@ void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent, O << ")"; } -void VPExtendedReductionRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "EXTENDED-REDUCE "; - printAsOperand(O, SlotTracker); - O << " = "; - getChainOp()->printAsOperand(O, SlotTracker); - O << " +"; - O << " reduce." - << Instruction::getOpcodeName( - RecurrenceDescriptor::getOpcode(getRecurrenceKind())) - << " ("; - getVecOp()->printAsOperand(O, SlotTracker); - printFlags(O); - O << Instruction::getOpcodeName(ExtOp) << " to " << *getResultType(); - if (isConditional()) { - O << ", "; - getCondOp()->printAsOperand(O, SlotTracker); - } - O << ")"; -} - -void VPMulAccumulateReductionRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "MULACC-REDUCE "; - printAsOperand(O, SlotTracker); - O << " = "; - getChainOp()->printAsOperand(O, SlotTracker); - O << " + "; - O << "reduce." - << Instruction::getOpcodeName( - RecurrenceDescriptor::getOpcode(getRecurrenceKind())) - << " ("; - O << "mul"; - printFlags(O); - if (isExtended()) - O << "("; - getVecOp0()->printAsOperand(O, SlotTracker); - if (isExtended()) - O << " " << Instruction::getOpcodeName(ExtOp) << " to " << *getResultType() - << "), ("; - else - O << ", "; - getVecOp1()->printAsOperand(O, SlotTracker); - if (isExtended()) - O << " " << Instruction::getOpcodeName(ExtOp) << " to " << *getResultType() - << ")"; - if (isConditional()) { - O << ", "; - getCondOp()->printAsOperand(O, SlotTracker); - } - O << ")"; -} #endif /// A helper function to scalarize a single Instruction in the innermost loop. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 8d4a73c744469..418a2ccbd6b40 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2665,82 +2665,6 @@ void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) { R->dissolveToCFGLoop(); } -// Expand VPExtendedReductionRecipe to VPWidenCastRecipe + VPReductionRecipe. -static void expandVPExtendedReduction(VPExtendedReductionRecipe *ExtRed) { - VPWidenCastRecipe *Ext; - // Only ZExt contains non-neg flags. - if (ExtRed->isZExt()) - Ext = new VPWidenCastRecipe(ExtRed->getExtOpcode(), ExtRed->getVecOp(), - ExtRed->getResultType(), *ExtRed, - ExtRed->getDebugLoc()); - else - Ext = new VPWidenCastRecipe(ExtRed->getExtOpcode(), ExtRed->getVecOp(), - ExtRed->getResultType(), {}, - ExtRed->getDebugLoc()); - - auto *Red = new VPReductionRecipe( - ExtRed->getRecurrenceKind(), FastMathFlags(), ExtRed->getChainOp(), Ext, - ExtRed->getCondOp(), ExtRed->isOrdered(), ExtRed->getDebugLoc()); - Ext->insertBefore(ExtRed); - Red->insertBefore(ExtRed); - ExtRed->replaceAllUsesWith(Red); - ExtRed->eraseFromParent(); -} - -// Expand VPMulAccumulateReductionRecipe to VPWidenRecipe (mul) + -// VPReductionRecipe (reduce.add) -// + VPWidenCastRecipe (optional). -static void -expandVPMulAccumulateReduction(VPMulAccumulateReductionRecipe *MulAcc) { - // Generate inner VPWidenCastRecipes if necessary. - // Note that we will drop the extend after mul which transforms - // reduce.add(ext(mul(ext, ext))) to reduce.add(mul(ext, ext)). - VPValue *Op0, *Op1; - if (MulAcc->isExtended()) { - Type *RedTy = MulAcc->getResultType(); - if (MulAcc->isZExt()) - Op0 = new VPWidenCastRecipe( - MulAcc->getExtOpcode(), MulAcc->getVecOp0(), RedTy, - VPIRFlags::NonNegFlagsTy(MulAcc->isNonNeg()), MulAcc->getDebugLoc()); - else - Op0 = new VPWidenCastRecipe(MulAcc->getExtOpcode(), MulAcc->getVecOp0(), - RedTy, {}, MulAcc->getDebugLoc()); - Op0->getDefiningRecipe()->insertBefore(MulAcc); - // Prevent reduce.add(mul(ext(A), ext(A))) generate duplicate - // VPWidenCastRecipe. - if (MulAcc->getVecOp0() == MulAcc->getVecOp1()) { - Op1 = Op0; - } else { - if (MulAcc->isZExt()) - Op1 = new VPWidenCastRecipe( - MulAcc->getExtOpcode(), MulAcc->getVecOp1(), RedTy, - VPIRFlags::NonNegFlagsTy(MulAcc->isNonNeg()), - MulAcc->getDebugLoc()); - else - Op1 = new VPWidenCastRecipe(MulAcc->getExtOpcode(), MulAcc->getVecOp1(), - RedTy, {}, MulAcc->getDebugLoc()); - Op1->getDefiningRecipe()->insertBefore(MulAcc); - } - } else { - // No extends in this MulAccRecipe. - Op0 = MulAcc->getVecOp0(); - Op1 = MulAcc->getVecOp1(); - } - - std::array MulOps = {Op0, Op1}; - auto *Mul = new VPWidenRecipe(Instruction::Mul, ArrayRef(MulOps), *MulAcc, - MulAcc->getDebugLoc()); - Mul->insertBefore(MulAcc); - - auto *Red = new VPReductionRecipe( - MulAcc->getRecurrenceKind(), FastMathFlags(), MulAcc->getChainOp(), Mul, - MulAcc->getCondOp(), MulAcc->isOrdered(), MulAcc->getDebugLoc()); - Red->insertBefore(MulAcc); - - MulAcc->replaceAllUsesWith(Red); - MulAcc->eraseFromParent(); -} - void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, Type &CanonicalIVTy) { using namespace llvm::VPlanPatternMatch; @@ -2765,6 +2689,11 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, continue; } + if (auto *Expr = dyn_cast(&R)) { + Expr->decompose(); + ToRemove.push_back(Expr); + } + VPValue *VectorStep; VPValue *ScalarStep; if (!match(&R, m_VPInstruction( @@ -2805,14 +2734,6 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, VPI->replaceAllUsesWith(VectorStep); ToRemove.push_back(VPI); } - for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - if (auto *ExtRed = dyn_cast(&R)) { - expandVPExtendedReduction(ExtRed); - continue; - } - if (auto *MulAcc = dyn_cast(&R)) - expandVPMulAccumulateReduction(MulAcc); - } } for (VPRecipeBase *R : ToRemove) @@ -2911,10 +2832,10 @@ void VPlanTransforms::handleUncountableEarlyExit( } /// This function tries convert extended in-loop reductions to -/// VPExtendedReductionRecipe and clamp the \p Range if it is beneficial and -/// valid. The created recipe must be lowered to concrete +/// VPExpressionRecipe and clamp the \p Range if it is beneficial and +/// valid. The created recipe must be decomposed to its constituent /// recipes before execution. -static VPExtendedReductionRecipe * +static VPExpressionRecipe * tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx, VFRange &Range) { using namespace VPlanPatternMatch; @@ -2948,19 +2869,20 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx, cast(VecOp)->getOpcode() == Instruction::CastOps::ZExt, Ctx.Types.inferScalarType(A))) - return new VPExtendedReductionRecipe(Red, cast(VecOp)); + return new VPExpressionRecipe(cast(VecOp), Red); return nullptr; } /// This function tries convert extended in-loop reductions to -/// VPMulAccumulateReductionRecipe and clamp the \p Range if it is beneficial -/// and valid. The created VPExtendedReductionRecipe must be lower to concrete -/// recipes before execution. Patterns of MulAccumulateReduction: +/// VPExpressionRecipe and clamp the \p Range if it is beneficial +/// and valid. The created VPExpressionRecipe must be decomposed to its +/// constituent recipes before execution. Patterns of the +/// VPExpressionRecipe: /// reduce.add(mul(...)), /// reduce.add(mul(ext(A), ext(B))), /// reduce.add(ext(mul(ext(A), ext(B)))). -static VPMulAccumulateReductionRecipe * +static VPExpressionRecipe * tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, VPCostContext &Ctx, VFRange &Range) { using namespace VPlanPatternMatch; @@ -3016,12 +2938,12 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, match(RecipeB, m_ZExtOrSExt(m_VPValue())) && IsMulAccValidAndClampRange(RecipeA->getOpcode() == Instruction::CastOps::ZExt, - Mul, RecipeA, RecipeB, nullptr)) - return new VPMulAccumulateReductionRecipe(Red, Mul, RecipeA, RecipeB, - RecipeA->getResultType()); + Mul, RecipeA, RecipeB, nullptr)) { + return new VPExpressionRecipe(RecipeA, RecipeB, Mul, Red); + } // Match reduce.add(mul). if (IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr)) - return new VPMulAccumulateReductionRecipe(Red, Mul, RedTy); + return new VPExpressionRecipe(Mul, Red); } // Match reduce.add(ext(mul(ext(A), ext(B)))). // All extend recipes must have same opcode or A == B @@ -3038,9 +2960,24 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, Ext0->getOpcode() == Ext1->getOpcode() && IsMulAccValidAndClampRange(Ext0->getOpcode() == Instruction::CastOps::ZExt, - Mul, Ext0, Ext1, Ext)) - return new VPMulAccumulateReductionRecipe(Red, Mul, Ext0, Ext1, - Ext->getResultType()); + Mul, Ext0, Ext1, Ext)) { + auto *NewExt0 = new VPWidenCastRecipe( + Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), *Ext0, + Ext0->getDebugLoc()); + NewExt0->insertBefore(Ext0); + + VPWidenCastRecipe *NewExt1 = NewExt0; + if (Ext0 != Ext1) { + NewExt1 = new VPWidenCastRecipe(Ext1->getOpcode(), Ext1->getOperand(0), + Ext->getResultType(), *Ext1, + Ext1->getDebugLoc()); + NewExt1->insertBefore(Ext1); + } + Mul->setOperand(0, NewExt0); + Mul->setOperand(1, NewExt1); + Red->setOperand(1, Mul); + return new VPExpressionRecipe(NewExt0, NewExt1, Mul, Red); + } } return nullptr; } @@ -3050,8 +2987,9 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, static void tryToCreateAbstractReductionRecipe(VPReductionRecipe *Red, VPCostContext &Ctx, VFRange &Range) { - VPReductionRecipe *AbstractR = nullptr; - + VPExpressionRecipe *AbstractR = nullptr; + auto IP = std::next(Red->getIterator()); + auto *VPBB = Red->getParent(); if (auto *MulAcc = tryToMatchAndCreateMulAccumulateReduction(Red, Ctx, Range)) AbstractR = MulAcc; else if (auto *ExtRed = tryToMatchAndCreateExtendedReduction(Red, Ctx, Range)) @@ -3060,7 +2998,7 @@ static void tryToCreateAbstractReductionRecipe(VPReductionRecipe *Red, if (!AbstractR) return; - AbstractR->insertBefore(Red); + AbstractR->insertBefore(*VPBB, IP); Red->replaceAllUsesWith(AbstractR); } @@ -3068,7 +3006,7 @@ void VPlanTransforms::convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, VFRange &Range) { for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getVectorLoopRegion()))) { - for (VPRecipeBase &R : *VPBB) { + for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { if (auto *Red = dyn_cast(&R)) tryToCreateAbstractReductionRecipe(Red, Ctx, Range); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index a0d3dc9b934cc..279cdac92d2d1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -49,6 +49,7 @@ class VPValue { friend struct VPDoubleValueDef; friend class VPInterleaveRecipe; friend class VPlan; + friend class VPExpressionRecipe; const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). @@ -330,13 +331,12 @@ class VPDef { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, + VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, - VPMulAccumulateReductionSC, - VPExtendedReductionSC, VPPartialReductionSC, VPReplicateSC, VPScalarIVStepsSC, diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll index f179a3ae04d23..ddd334d2982f8 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll @@ -1525,8 +1525,8 @@ define i64 @mla_and_add_together_16_64(ptr nocapture noundef readonly %x, i32 no ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP0]], align 2 -; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i64> -; CHECK-NEXT: [[TMP2:%.*]] = mul nsw <8 x i64> [[TMP1]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = mul nsw <8 x i64> [[TMP4]], [[TMP4]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP2]]) ; CHECK-NEXT: [[TMP5]] = add i64 [[TMP3]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP10:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll index 2e8109c18948e..4af3fa9202c77 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll @@ -284,12 +284,12 @@ define i64 @print_extended_reduction(ptr nocapture readonly %x, ptr nocapture re ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[ADDR:%.+]]> = vector-pointer ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<[[LOAD:%.+]]> = load vp<[[ADDR]]> -; CHECK-NEXT: EXTENDED-REDUCE ir<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (ir<[[LOAD]]> zext to i64) +; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (ir<[[LOAD]]> zext to i64) ; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors @@ -329,7 +329,7 @@ define i64 @print_mulacc(ptr nocapture readonly %x, ptr nocapture readonly %y, i ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]> @@ -337,7 +337,7 @@ define i64 @print_mulacc(ptr nocapture readonly %x, ptr nocapture readonly %y, i ; CHECK-NEXT: CLONE ir<[[ARRAYIDX1:%.+]]> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[ADDR1:%.+]]> = vector-pointer ir<[[ARRAYIDX1]]> ; CHECK-NEXT: WIDEN ir<[[LOAD1:%.+]]> = load vp<[[ADDR1]]> -; CHECK-NEXT: MULACC-REDUCE ir<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul nsw ir<[[LOAD0]]>, ir<[[LOAD1]]>) +; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul nsw ir<[[LOAD0]]>, ir<[[LOAD1]]>) ; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors @@ -379,7 +379,7 @@ define i64 @print_mulacc_extended(ptr nocapture readonly %x, ptr nocapture reado ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> -; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]> @@ -387,7 +387,7 @@ define i64 @print_mulacc_extended(ptr nocapture readonly %x, ptr nocapture reado ; CHECK-NEXT: CLONE ir<[[ARRAYIDX1:%.+]]> = getelementptr inbounds ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[ADDR1:%.+]]> = vector-pointer ir<[[ARRAYIDX1]]> ; CHECK-NEXT: WIDEN ir<[[LOAD1:%.+]]> = load vp<[[ADDR1]]> -; CHECK-NEXT: MULACC-REDUCE ir<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (mul nsw (ir<[[LOAD0]]> sext to i64), (ir<[[LOAD1]]> sext to i64)) +; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (mul nsw (ir<[[LOAD0]]> sext to i64), (ir<[[LOAD1]]> sext to i64)) ; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> ; CHECK-NEXT: No successors