diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 604dc9419025b..fd8784a4c1003 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1480,6 +1480,10 @@ enum NodeType { // Output: Output Chain EXPERIMENTAL_VECTOR_HISTOGRAM, + // Finds the index of the last active mask element + // Operands: Mask + VECTOR_FIND_LAST_ACTIVE, + // llvm.clear_cache intrinsic // Operands: Input Chain, Start Addres, End Address // Outputs: Output Chain diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 3751aac4df8ea..6edc750ea722d 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5368,6 +5368,11 @@ class TargetLowering : public TargetLoweringBase { /// \returns The expansion result or SDValue() if it fails. SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const; + /// Expand VECTOR_FIND_LAST_ACTIVE nodes + /// \param N Node to expand + /// \returns The expansion result or SDValue() if it fails. + SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const; + /// Expand ABS nodes. Expands vector/scalar ABS nodes, /// vector nodes can only succeed if all operations are legal/custom. /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index be7521f341685..c519603fae9a2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -155,6 +155,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND_VECTOR_INREG: Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break; + case ISD::VECTOR_FIND_LAST_ACTIVE: + Res = PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(N); + break; + case ISD::SIGN_EXTEND: case ISD::VP_SIGN_EXTEND: case ISD::ZERO_EXTEND: @@ -2069,6 +2073,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: Res = PromoteIntOp_VECTOR_HISTOGRAM(N, OpNo); break; + case ISD::VECTOR_FIND_LAST_ACTIVE: + Res = PromoteIntOp_VECTOR_FIND_LAST_ACTIVE(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -2810,6 +2817,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECTOR_HISTOGRAM(SDNode *N, return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_VECTOR_FIND_LAST_ACTIVE(SDNode *N, + unsigned OpNo) { + SmallVector NewOps(N->ops()); + NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + //===----------------------------------------------------------------------===// // Integer Result Expansion //===----------------------------------------------------------------------===// @@ -6124,6 +6138,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); } +SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, SDLoc(N), NVT, N->ops()); +} + SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 571a710cc92a3..069e191d10d7d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -378,6 +378,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteIntRes_VPFunnelShift(SDNode *N); SDValue PromoteIntRes_IS_FPCLASS(SDNode *N); SDValue PromoteIntRes_PATCHPOINT(SDNode *N); + SDValue PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(SDNode *N); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); @@ -428,6 +429,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_VECTOR_HISTOGRAM(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_VECTOR_FIND_LAST_ACTIVE(SDNode *N, unsigned OpNo); void SExtOrZExtPromotedOperands(SDValue &LHS, SDValue &RHS); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index e8404a13009a7..607c70675c988 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -503,6 +503,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::VECREDUCE_FMIN: case ISD::VECREDUCE_FMAXIMUM: case ISD::VECREDUCE_FMINIMUM: + case ISD::VECTOR_FIND_LAST_ACTIVE: Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; @@ -1208,6 +1209,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { case ISD::VECTOR_COMPRESS: Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG)); return; + case ISD::VECTOR_FIND_LAST_ACTIVE: + Results.push_back(TLI.expandVectorFindLastActive(Node, DAG)); + return; case ISD::SCMP: case ISD::UCMP: Results.push_back(TLI.expandCMP(Node, DAG)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index f8d7c3ef7bbe7..abcc75c5a26ab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6427,42 +6427,25 @@ void SelectionDAGBuilder::visitVectorExtractLastActive(const CallInst &I, assert(Intrinsic == Intrinsic::experimental_vector_extract_last_active && "Tried lowering invalid vector extract last"); SDLoc sdl = getCurSDLoc(); + const DataLayout &Layout = DAG.getDataLayout(); SDValue Data = getValue(I.getOperand(0)); SDValue Mask = getValue(I.getOperand(1)); - SDValue PassThru = getValue(I.getOperand(2)); - EVT DataVT = Data.getValueType(); - EVT ScalarVT = PassThru.getValueType(); - EVT BoolVT = Mask.getValueType().getScalarType(); - - // Find a suitable type for a stepvector. - ConstantRange VScaleRange(1, /*isFullSet=*/true); // Dummy value. - if (DataVT.isScalableVector()) - VScaleRange = getVScaleRange(I.getCaller(), 64); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned EltWidth = TLI.getBitWidthForCttzElements( - I.getType(), DataVT.getVectorElementCount(), /*ZeroIsPoison=*/true, - &VScaleRange); - MVT StepVT = MVT::getIntegerVT(EltWidth); - EVT StepVecVT = DataVT.changeVectorElementType(StepVT); - - // Zero out lanes with inactive elements, then find the highest remaining - // value from the stepvector. - SDValue Zeroes = DAG.getConstant(0, sdl, StepVecVT); - SDValue StepVec = DAG.getStepVector(sdl, StepVecVT); - SDValue ActiveElts = DAG.getSelect(sdl, StepVecVT, Mask, StepVec, Zeroes); - SDValue HighestIdx = - DAG.getNode(ISD::VECREDUCE_UMAX, sdl, StepVT, ActiveElts); - - // Extract the corresponding lane from the data vector - EVT ExtVT = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue Idx = DAG.getZExtOrTrunc(HighestIdx, sdl, ExtVT); - SDValue Extract = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, ScalarVT, Data, Idx); - - // If all mask lanes were inactive, choose the passthru value instead. - SDValue AnyActive = DAG.getNode(ISD::VECREDUCE_OR, sdl, BoolVT, Mask); - SDValue Result = DAG.getSelect(sdl, ScalarVT, AnyActive, Extract, PassThru); + EVT ResVT = TLI.getValueType(Layout, I.getType()); + + EVT ExtVT = TLI.getVectorIdxTy(Layout); + SDValue Idx = DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, sdl, ExtVT, Mask); + SDValue Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, ResVT, Data, Idx); + + Value *Default = I.getOperand(2); + if (!isa(Default) && !isa(Default)) { + SDValue PassThru = getValue(Default); + EVT BoolVT = Mask.getValueType().getScalarType(); + SDValue AnyActive = DAG.getNode(ISD::VECREDUCE_OR, sdl, BoolVT, Mask); + Result = DAG.getSelect(sdl, ResVT, AnyActive, Result, PassThru); + } + setValue(&I, Result); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 580ff19065557..f63c8dd3df1c8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -567,6 +567,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: return "histogram"; + case ISD::VECTOR_FIND_LAST_ACTIVE: + return "find_last_active"; + // Vector Predication #define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \ case ISD::SDID: \ diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9f57884eae04d..90ac79cfb0e3b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/CodeGenCommonISel.h" @@ -9453,6 +9454,43 @@ SDValue TargetLowering::expandVPCTTZElements(SDNode *N, return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL); } +SDValue TargetLowering::expandVectorFindLastActive(SDNode *N, + SelectionDAG &DAG) const { + SDLoc DL(N); + SDValue Mask = N->getOperand(0); + EVT MaskVT = Mask.getValueType(); + EVT BoolVT = MaskVT.getScalarType(); + + // Find a suitable type for a stepvector. + ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default. + if (MaskVT.isScalableVector()) + VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned EltWidth = TLI.getBitWidthForCttzElements( + BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(), + /*ZeroIsPoison=*/true, &VScaleRange); + EVT StepVT = MVT::getIntegerVT(EltWidth); + EVT StepVecVT = MaskVT.changeVectorElementType(StepVT); + + // If promotion is required to make the type legal, do it here; promotion + // of integers within LegalizeVectorOps is looking for types of the same + // size but with a smaller number of larger elements, not the usual larger + // size with the same number of larger elements. + if (TLI.getTypeAction(StepVecVT.getSimpleVT()) == + TargetLowering::TypePromoteInteger) { + StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT); + StepVT = StepVecVT.getVectorElementType(); + } + + // Zero out lanes with inactive elements, then find the highest remaining + // value from the stepvector. + SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT); + SDValue StepVec = DAG.getStepVector(DL, StepVecVT); + SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes); + SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts); + return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0)); +} + SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative) const { SDLoc dl(N); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 3b0e9c7526fd0..73af0a9a71407 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -818,6 +818,9 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SDOPC, VT, Expand); #include "llvm/IR/VPIntrinsics.def" + // Masked vector extracts default to expand. + setOperationAction(ISD::VECTOR_FIND_LAST_ACTIVE, VT, Expand); + // FP environment operations default to expand. setOperationAction(ISD::GET_FPENV, VT, Expand); setOperationAction(ISD::SET_FPENV, VT, Expand); diff --git a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll index 5212acc6fca0f..3b11e67d072e7 100644 --- a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll +++ b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll @@ -318,7 +318,7 @@ define i16 @extract_last_i16_scalable( %data, %data, %data, %data, %data, %data, %data, %mask) #0 { +; CHECK-LABEL: extract_last_i8_scalable_poison_passthru: +; CHECK: // %bb.0: +; CHECK-NEXT: index z1.b, #0, #1 +; CHECK-NEXT: mov z2.b, #0 // =0x0 +; CHECK-NEXT: sel z1.b, p0, z1.b, z2.b +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: umaxv b1, p0, z1.b +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: and x8, x8, #0xff +; CHECK-NEXT: whilels p0.b, xzr, x8 +; CHECK-NEXT: lastb w0, p0, z0.b +; CHECK-NEXT: ret + %res = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8( %data, %mask, i8 poison) + ret i8 %res +} + declare i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8>, <16 x i1>, i8) declare i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16>, <8 x i1>, i16) declare i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32>, <4 x i1>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-extract-last-active.ll b/llvm/test/CodeGen/RISCV/rvv/vector-extract-last-active.ll index 1eef183db21bb..10929394af75f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-extract-last-active.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-extract-last-active.ll @@ -189,79 +189,43 @@ define i8 @extract_last_i8_scalable( %data, } define i16 @extract_last_i16_scalable( %data, %mask, i16 %passthru) { -; RV32-LABEL: extract_last_i16_scalable: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, mu -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vcpop.m a1, v0 -; RV32-NEXT: vid.v v10, v0.t -; RV32-NEXT: beqz a1, .LBB7_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vredmaxu.vs v10, v10, v10 -; RV32-NEXT: vmv.x.s a0, v10 -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: srli a0, a0, 16 -; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vslidedown.vx v8, v8, a0 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: .LBB7_2: -; RV32-NEXT: ret -; -; RV64-LABEL: extract_last_i16_scalable: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, mu -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vcpop.m a1, v0 -; RV64-NEXT: vid.v v10, v0.t -; RV64-NEXT: beqz a1, .LBB7_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vredmaxu.vs v10, v10, v10 -; RV64-NEXT: vmv.x.s a0, v10 -; RV64-NEXT: slli a0, a0, 48 -; RV64-NEXT: srli a0, a0, 48 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vslidedown.vx v8, v8, a0 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: .LBB7_2: -; RV64-NEXT: ret +; CHECK-LABEL: extract_last_i16_scalable: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vcpop.m a1, v0 +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: beqz a1, .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vredmaxu.vs v10, v10, v10 +; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: ret %res = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16( %data, %mask, i16 %passthru) ret i16 %res } define i32 @extract_last_i32_scalable( %data, %mask, i32 %passthru) { -; RV32-LABEL: extract_last_i32_scalable: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, mu -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vcpop.m a1, v0 -; RV32-NEXT: vid.v v10, v0.t -; RV32-NEXT: beqz a1, .LBB8_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vredmaxu.vs v10, v10, v10 -; RV32-NEXT: vmv.x.s a0, v10 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vx v8, v8, a0 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: .LBB8_2: -; RV32-NEXT: ret -; -; RV64-LABEL: extract_last_i32_scalable: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, mu -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vcpop.m a1, v0 -; RV64-NEXT: vid.v v10, v0.t -; RV64-NEXT: beqz a1, .LBB8_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vredmaxu.vs v10, v10, v10 -; RV64-NEXT: vmv.x.s a0, v10 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vslidedown.vx v8, v8, a0 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: .LBB8_2: -; RV64-NEXT: ret +; CHECK-LABEL: extract_last_i32_scalable: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vcpop.m a1, v0 +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: beqz a1, .LBB8_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vredmaxu.vs v10, v10, v10 +; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: ret %res = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32( %data, %mask, i32 %passthru) ret i32 %res } @@ -269,7 +233,7 @@ define i32 @extract_last_i32_scalable( %data, %data, %mask, i64 %passthru) { ; RV32-LABEL: extract_last_i64_scalable: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, mu +; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, mu ; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vcpop.m a2, v0 ; RV32-NEXT: vid.v v10, v0.t @@ -278,6 +242,7 @@ define i64 @extract_last_i64_scalable( %data, %data, %data, %data, %data, %mask, float %passthru) { -; RV32-LABEL: extract_last_float_scalable: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vcpop.m a0, v0 -; RV32-NEXT: vid.v v10, v0.t -; RV32-NEXT: beqz a0, .LBB10_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: vredmaxu.vs v10, v10, v10 -; RV32-NEXT: vmv.x.s a0, v10 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vx v8, v8, a0 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: .LBB10_2: -; RV32-NEXT: ret -; -; RV64-LABEL: extract_last_float_scalable: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vcpop.m a0, v0 -; RV64-NEXT: vid.v v10, v0.t -; RV64-NEXT: beqz a0, .LBB10_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: vredmaxu.vs v10, v10, v10 -; RV64-NEXT: vmv.x.s a0, v10 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a0, a0, 32 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vslidedown.vx v8, v8, a0 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: .LBB10_2: -; RV64-NEXT: ret +; CHECK-LABEL: extract_last_float_scalable: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: beqz a0, .LBB10_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vredmaxu.vs v10, v10, v10 +; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: ret %res = call float @llvm.experimental.vector.extract.last.active.nxv4f32( %data, %mask, float %passthru) ret float %res } @@ -347,7 +296,7 @@ define float @extract_last_float_scalable( %data, %data, %mask, double %passthru) { ; CHECK-LABEL: extract_last_double_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: vid.v v10, v0.t @@ -355,6 +304,7 @@ define double @extract_last_double_scalable( %data,