diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index d9f3f4ab3935d..4183ca112033f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -811,6 +811,18 @@ class GExtractSubvector : public GenericMachineInstr { } }; +/// Represents a insert subvector. +class GInsertSubvector : public GenericMachineInstr { +public: + Register getBigVec() const { return getOperand(1).getReg(); } + Register getSubVec() const { return getOperand(2).getReg(); } + uint64_t getIndexImm() const { return getOperand(3).getImm(); } + + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_INSERT_SUBVECTOR; + } +}; + /// Represents a freeze. class GFreeze : public GenericMachineInstr { public: diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index ec8a299388376..f682b20816d57 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -380,6 +380,8 @@ class LegalizerHelper { LLT CastTy); LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy); + LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy); LegalizeResult lowerConstant(MachineInstr &MI); LegalizeResult lowerFConstant(MachineInstr &MI); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 3b2fd95076c46..98aece0d68d6e 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3276,6 +3276,33 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_INSERT_SUBVECTOR: { + if (TypeIdx != 0) + return UnableToLegalize; + + GInsertSubvector &IS = cast(MI); + Register BigVec = IS.getBigVec(); + Register SubVec = IS.getSubVec(); + + LLT SubVecTy = MRI.getType(SubVec); + LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType()); + + // Widen the G_INSERT_SUBVECTOR + auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec); + auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec); + auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt, + IS.getIndexImm()); + + // Truncate back down + auto SplatZero = MIRBuilder.buildSplatVector( + WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0)); + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert, + SplatZero); + + MI.eraseFromParent(); + + return Legalized; + } } } @@ -3725,6 +3752,77 @@ LegalizerHelper::bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy. +/// +/// = G_INSERT_SUBVECTOR , +/// , +/// N +/// +/// ===> +/// +/// = G_BITCAST +/// = G_BITCAST +/// = G_INSERT_SUBVECTOR , +/// , N / 8 +/// = G_BITCAST +LegalizerHelper::LegalizeResult +LegalizerHelper::bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy) { + auto ES = cast(&MI); + + if (!CastTy.isVector()) + return UnableToLegalize; + + if (TypeIdx != 0) + return UnableToLegalize; + + Register Dst = ES->getReg(0); + Register BigVec = ES->getBigVec(); + Register SubVec = ES->getSubVec(); + uint64_t Idx = ES->getIndexImm(); + + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + + LLT DstTy = MRI.getType(Dst); + LLT BigVecTy = MRI.getType(BigVec); + LLT SubVecTy = MRI.getType(SubVec); + + if (DstTy == CastTy) + return Legalized; + + if (DstTy.getSizeInBits() != CastTy.getSizeInBits()) + return UnableToLegalize; + + ElementCount DstTyEC = DstTy.getElementCount(); + ElementCount BigVecTyEC = BigVecTy.getElementCount(); + ElementCount SubVecTyEC = SubVecTy.getElementCount(); + auto DstTyMinElts = DstTyEC.getKnownMinValue(); + auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue(); + auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue(); + + unsigned CastEltSize = CastTy.getElementType().getSizeInBits(); + unsigned DstEltSize = DstTy.getElementType().getSizeInBits(); + if (CastEltSize < DstEltSize) + return UnableToLegalize; + + auto AdjustAmt = CastEltSize / DstEltSize; + if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 || + BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0) + return UnableToLegalize; + + Idx /= AdjustAmt; + BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt); + SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt); + auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec); + auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec); + auto PromotedIS = + MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx); + MIRBuilder.buildBitcast(Dst, PromotedIS); + + ES->eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT Register DstReg = LoadMI.getDstReg(); @@ -4033,6 +4131,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { return bitcastConcatVector(MI, TypeIdx, CastTy); case TargetOpcode::G_EXTRACT_SUBVECTOR: return bitcastExtractSubvector(MI, TypeIdx, CastTy); + case TargetOpcode::G_INSERT_SUBVECTOR: + return bitcastInsertSubvector(MI, TypeIdx, CastTy); default: return UnableToLegalize; } diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index d1449f751b40a..2d34f31f7e276 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -615,6 +615,12 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))); + getActionDefinitionsBuilder(G_INSERT_SUBVECTOR) + .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), + typeIsLegalBoolVec(1, BoolVecTys, ST))) + .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), + typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))); + getLegacyLegalizerInfo().computeTables(); } @@ -834,9 +840,7 @@ static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL, /// Gets the two common "VL" operands: an all-ones mask and the vector length. /// VecTy is a scalable vector type. static std::pair -buildDefaultVLOps(const DstOp &Dst, MachineIRBuilder &MIB, - MachineRegisterInfo &MRI) { - LLT VecTy = Dst.getLLTTy(MRI); +buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) { assert(VecTy.isScalableVector() && "Expecting scalable container type"); const RISCVSubtarget &STI = MIB.getMF().getSubtarget(); LLT XLenTy(STI.getXLenVT()); @@ -890,7 +894,7 @@ bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI, // Handle case of s64 element vectors on rv32 if (XLenTy.getSizeInBits() == 32 && VecTy.getElementType().getSizeInBits() == 64) { - auto [_, VL] = buildDefaultVLOps(Dst, MIB, MRI); + auto [_, VL] = buildDefaultVLOps(MRI.getType(Dst), MIB, MRI); buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB, MRI); MI.eraseFromParent(); @@ -1025,6 +1029,134 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI, return true; } +bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI, + LegalizerHelper &Helper, + MachineIRBuilder &MIB) const { + GInsertSubvector &IS = cast(MI); + + MachineRegisterInfo &MRI = *MIB.getMRI(); + + Register Dst = IS.getReg(0); + Register BigVec = IS.getBigVec(); + Register LitVec = IS.getSubVec(); + uint64_t Idx = IS.getIndexImm(); + + LLT BigTy = MRI.getType(BigVec); + LLT LitTy = MRI.getType(LitVec); + + if (Idx == 0 || + MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF) + return true; + + // We don't have the ability to slide mask vectors up indexed by their i1 + // elements; the smallest we can do is i8. Often we are able to bitcast to + // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8 + // vectors and truncate down after the insert. + if (LitTy.getElementType() == LLT::scalar(1)) { + auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue(); + auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue(); + if (BigTyMinElts >= 8 && LitTyMinElts >= 8) + return Helper.bitcast( + IS, 0, + LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8)); + + // We can't slide this mask vector up indexed by its i1 elements. + // This poses a problem when we wish to insert a scalable vector which + // can't be re-expressed as a larger type. Just choose the slow path and + // extend to a larger type, then truncate back down. + LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8)); + return Helper.widenScalar(IS, 0, ExtBigTy); + } + + const RISCVRegisterInfo *TRI = STI.getRegisterInfo(); + unsigned SubRegIdx, RemIdx; + std::tie(SubRegIdx, RemIdx) = + RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( + getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI); + + TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock); + assert(isPowerOf2_64( + STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue())); + bool ExactlyVecRegSized = + STI.expandVScale(LitTy.getSizeInBits()) + .isKnownMultipleOf(STI.expandVScale(VecRegSize)); + + // If the Idx has been completely eliminated and this subvector's size is a + // vector register or a multiple thereof, or the surrounding elements are + // undef, then this is a subvector insert which naturally aligns to a vector + // register. These can easily be handled using subregister manipulation. + if (RemIdx == 0 && ExactlyVecRegSized) + return true; + + // If the subvector is smaller than a vector register, then the insertion + // must preserve the undisturbed elements of the register. We do this by + // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type + // (which resolves to a subregister copy), performing a VSLIDEUP to place the + // subvector within the vector register, and an INSERT_SUBVECTOR of that + // LMUL=1 type back into the larger vector (resolving to another subregister + // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type + // to avoid allocating a large register group to hold our subvector. + + // VSLIDEUP works by leaving elements 0; +// Pseudo equivalent to a RISCVISD::VMV_V_V_VL +def G_VMV_V_V_VL : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$vec, type2:$vl); + let hasSideEffects = false; +} +def : GINodeEquiv; + +// Pseudo equivalent to a RISCVISD::VSLIDEUP_VL +def G_VSLIDEUP_VL : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$merge, type0:$vec, type1:$idx, type2:$mask, + type3:$vl, type4:$policy); + let hasSideEffects = false; +} +def : GINodeEquiv; + diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir new file mode 100644 index 0000000000000..68c5ae1204749 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir @@ -0,0 +1,610 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s -check-prefixes=CHECK,RV32 +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s -check-prefixes=CHECK,RV64 + + +# BigVec=G_IMPLICIT_DEF when index is non-zero +--- +name: insert_subvector_nxv2i1_nxv4i1_undef_nonzero +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insert_subvector_nxv2i1_nxv4i1_undef_nonzero + ; CHECK: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](), 2 + ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 2 + $v8 = COPY %2() + PseudoRET implicit $v8 +... + +# BigVec=G_IMPLICIT_DEF when index is zero +--- +name: insert_subvector_nxv2i1_nxv4i1_undef_zero +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + ; CHECK-LABEL: name: insert_subvector_nxv2i1_nxv4i1_undef_zero + ; CHECK: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](), 0 + ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 0 + $v8 = COPY %2() + PseudoRET implicit $v8 +... + + +# Special handling for i1-element vectors with non-zero index +--- +name: insert_subvector_nxv2i1_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: insert_subvector_nxv2i1_nxv4i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF1]], [[SELECT1]](), 0 + ; RV32-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL [[C4]](s64) + ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB + ; RV32-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C5]](s64) + ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB + ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C5]](s64) + ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]] + ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_() = G_VSLIDEUP_VL [[SELECT]], [[INSERT_SUBVECTOR]], [[LSHR1]](s64), [[VMSET_VL]](), [[ADD]](s64), 1 + ; RV32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[C6]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR4:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT4]](s64) + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[VSLIDEUP_VL]](), [[SPLAT_VECTOR4]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: insert_subvector_nxv2i1_nxv4i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C3]](s32) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF1]], [[SELECT1]](), 0 + ; RV64-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL [[C4]](s32) + ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB + ; RV64-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C5]](s32) + ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB + ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C5]](s32) + ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]] + ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_() = G_VSLIDEUP_VL [[SELECT]], [[INSERT_SUBVECTOR]], [[LSHR1]](s32), [[VMSET_VL]](), [[ADD]](s32), 1 + ; RV64-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[SPLAT_VECTOR4:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C6]](s32) + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[VSLIDEUP_VL]](), [[SPLAT_VECTOR4]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 2 + $v8 = COPY %2() + PseudoRET implicit $v8 +... +--- +name: insert_subvector_nxv4i1_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: insert_subvector_nxv4i1_nxv8i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF1]], [[SELECT1]](), 0 + ; RV32-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL [[C4]](s64) + ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB + ; RV32-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C5]](s64) + ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB + ; RV32-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C6]](s64) + ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]] + ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_() = G_VSLIDEUP_VL [[SELECT]], [[INSERT_SUBVECTOR]], [[LSHR1]](s64), [[VMSET_VL]](), [[ADD]](s64), 0 + ; RV32-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[C7]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR4:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT4]](s64) + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[VSLIDEUP_VL]](), [[SPLAT_VECTOR4]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: insert_subvector_nxv4i1_nxv8i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C3]](s32) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF1]], [[SELECT1]](), 0 + ; RV64-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL [[C4]](s32) + ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB + ; RV64-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C5]](s32) + ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB + ; RV64-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C6]](s32) + ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]] + ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_() = G_VSLIDEUP_VL [[SELECT]], [[INSERT_SUBVECTOR]], [[LSHR1]](s32), [[VMSET_VL]](), [[ADD]](s32), 0 + ; RV64-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[SPLAT_VECTOR4:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C7]](s32) + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[VSLIDEUP_VL]](), [[SPLAT_VECTOR4]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 4 + $v8 = COPY %2() + PseudoRET implicit $v8 +... +--- +name: insert_subvector_nxv32i1_nxv64i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: insert_subvector_nxv32i1_nxv64i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_() = G_BITCAST [[COPY]]() + ; RV32-NEXT: [[BITCAST1:%[0-9]+]]:_() = G_BITCAST [[DEF]]() + ; RV32-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF1]], [[BITCAST1]](), 0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL [[C]](s64) + ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C1]](s64) + ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB + ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C1]](s64) + ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]] + ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_() = G_VSLIDEUP_VL [[BITCAST]], [[INSERT_SUBVECTOR]], [[LSHR1]](s64), [[VMSET_VL]](), [[ADD]](s64), 1 + ; RV32-NEXT: [[BITCAST2:%[0-9]+]]:_() = G_BITCAST [[VSLIDEUP_VL]]() + ; RV32-NEXT: $v8 = COPY [[BITCAST2]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: insert_subvector_nxv32i1_nxv64i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_() = G_BITCAST [[COPY]]() + ; RV64-NEXT: [[BITCAST1:%[0-9]+]]:_() = G_BITCAST [[DEF]]() + ; RV64-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF1]], [[BITCAST1]](), 0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL [[C]](s32) + ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C1]](s32) + ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB + ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C1]](s32) + ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]] + ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_() = G_VSLIDEUP_VL [[BITCAST]], [[INSERT_SUBVECTOR]], [[LSHR1]](s32), [[VMSET_VL]](), [[ADD]](s32), 1 + ; RV64-NEXT: [[BITCAST2:%[0-9]+]]:_() = G_BITCAST [[VSLIDEUP_VL]]() + ; RV64-NEXT: $v8 = COPY [[BITCAST2]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 32 + $v8 = COPY %2() + PseudoRET implicit $v8 +... + +# i1-element vectors with zero index +--- +name: insert_subvector_nxv2i1_nxv4i1_zero +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; CHECK-LABEL: name: insert_subvector_nxv2i1_nxv4i1_zero + ; CHECK: liveins: $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[COPY]], [[DEF]](), 0 + ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 0 + $v8 = COPY %2() + PseudoRET implicit $v8 +... +--- +name: insert_subvector_nxv4i1_nxv8i1_zero +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; CHECK-LABEL: name: insert_subvector_nxv4i1_nxv8i1_zero + ; CHECK: liveins: $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[COPY]], [[DEF]](), 0 + ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 0 + $v8 = COPY %2() + PseudoRET implicit $v8 +... +--- +name: insert_subvector_nxv32i1_nxv64i1_zero +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; CHECK-LABEL: name: insert_subvector_nxv32i1_nxv64i1_zero + ; CHECK: liveins: $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[COPY]], [[DEF]](), 0 + ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 0 + $v8 = COPY %2() + PseudoRET implicit $v8 +... + +# Insert with zero index +--- +name: insert_subvector_nxv1i8_nxv2i8_zero +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; CHECK-LABEL: name: insert_subvector_nxv1i8_nxv2i8_zero + ; CHECK: liveins: $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[COPY]], [[DEF]](), 0 + ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 0 + $v8 = COPY %2() + PseudoRET implicit $v8 +... +--- +name: insert_subvector_nxv2i16_nxv4i16_zero +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; CHECK-LABEL: name: insert_subvector_nxv2i16_nxv4i16_zero + ; CHECK: liveins: $v8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[COPY]], [[DEF]](), 0 + ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 0 + $v8 = COPY %2() + PseudoRET implicit $v8 +... +--- +name: insert_subvector_nxv4i32_nxv8i32_zero +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8m4 + ; CHECK-LABEL: name: insert_subvector_nxv4i32_nxv8i32_zero + ; CHECK: liveins: $v8m4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[COPY]], [[DEF]](), 0 + ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8m4 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 0 + $v8 = COPY %2() + PseudoRET implicit $v8 +... +--- +name: insert_subvector_nxv2i64_nxv8i64_zero +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8m8 + ; CHECK-LABEL: name: insert_subvector_nxv2i64_nxv8i64_zero + ; CHECK: liveins: $v8m8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[COPY]], [[DEF]](), 0 + ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8m8 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 0 + $v8 = COPY %2() + PseudoRET implicit $v8 +... + +# Insert with non-zero index +--- +name: insert_subvector_nxv1i8_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: insert_subvector_nxv1i8_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF1]], [[DEF]](), 0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL [[C]](s64) + ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C1]](s64) + ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB + ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C1]](s64) + ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]] + ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_() = G_VSLIDEUP_VL [[COPY]], [[INSERT_SUBVECTOR]], [[LSHR1]](s64), [[VMSET_VL]](), [[ADD]](s64), 1 + ; RV32-NEXT: $v8 = COPY [[VSLIDEUP_VL]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: insert_subvector_nxv1i8_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF1]], [[DEF]](), 0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL [[C]](s32) + ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C1]](s32) + ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB + ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C1]](s32) + ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]] + ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_() = G_VSLIDEUP_VL [[COPY]], [[INSERT_SUBVECTOR]], [[LSHR1]](s32), [[VMSET_VL]](), [[ADD]](s32), 1 + ; RV64-NEXT: $v8 = COPY [[VSLIDEUP_VL]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 1 + $v8 = COPY %2() + PseudoRET implicit $v8 +... +--- +name: insert_subvector_nxv2i16_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: insert_subvector_nxv2i16_nxv4i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF1]], [[DEF]](), 0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL [[C]](s64) + ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C1]](s64) + ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB + ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C1]](s64) + ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]] + ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_() = G_VSLIDEUP_VL [[COPY]], [[INSERT_SUBVECTOR]], [[LSHR1]](s64), [[VMSET_VL]](), [[ADD]](s64), 0 + ; RV32-NEXT: $v8 = COPY [[VSLIDEUP_VL]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: insert_subvector_nxv2i16_nxv4i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF1]], [[DEF]](), 0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL [[C]](s32) + ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C1]](s32) + ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB + ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C1]](s32) + ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]] + ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_() = G_VSLIDEUP_VL [[COPY]], [[INSERT_SUBVECTOR]], [[LSHR1]](s32), [[VMSET_VL]](), [[ADD]](s32), 0 + ; RV64-NEXT: $v8 = COPY [[VSLIDEUP_VL]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 1 + $v8 = COPY %2() + PseudoRET implicit $v8 +... +--- +name: insert_subvector_nxv8i16_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8m8 + ; RV32-LABEL: name: insert_subvector_nxv8i16_nxv1i16 + ; RV32: liveins: $v8m8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_() = G_EXTRACT_SUBVECTOR [[COPY]](), 4 + ; RV32-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF1]], [[DEF]](), 0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL [[C]](s64) + ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C1]](s64) + ; RV32-NEXT: [[VMV_V_V_VL:%[0-9]+]]:_() = G_VMV_V_V_VL [[EXTRACT_SUBVECTOR]], [[INSERT_SUBVECTOR]](), [[LSHR]](s64) + ; RV32-NEXT: [[INSERT_SUBVECTOR1:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[COPY]], [[VMV_V_V_VL]](), 4 + ; RV32-NEXT: $v8 = COPY [[INSERT_SUBVECTOR1]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: insert_subvector_nxv8i16_nxv1i16 + ; RV64: liveins: $v8m8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_() = G_EXTRACT_SUBVECTOR [[COPY]](), 4 + ; RV64-NEXT: [[DEF1:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[DEF1]], [[DEF]](), 0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL [[C]](s32) + ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C1]](s32) + ; RV64-NEXT: [[VMV_V_V_VL:%[0-9]+]]:_() = G_VMV_V_V_VL [[EXTRACT_SUBVECTOR]], [[INSERT_SUBVECTOR]](), [[LSHR]](s32) + ; RV64-NEXT: [[INSERT_SUBVECTOR1:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[COPY]], [[VMV_V_V_VL]](), 4 + ; RV64-NEXT: $v8 = COPY [[INSERT_SUBVECTOR1]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 4 + $v8 = COPY %2() + PseudoRET implicit $v8 +... +--- +name: insert_subvector_nxv4i32_nxv8i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8m4 + ; CHECK-LABEL: name: insert_subvector_nxv4i32_nxv8i32 + ; CHECK: liveins: $v8m4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[COPY]], [[DEF]](), 4 + ; CHECK-NEXT: $v8m4 = COPY [[INSERT_SUBVECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8m4 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 4 + $v8m4 = COPY %2() + PseudoRET implicit $v8m4 +... +--- +name: insert_subvector_nxv2i64_nxv8i64 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8m8 + ; CHECK-LABEL: name: insert_subvector_nxv2i64_nxv8i64 + ; CHECK: liveins: $v8m8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_() = G_INSERT_SUBVECTOR [[COPY]], [[DEF]](), 4 + ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8m8 + %1:_() = G_IMPLICIT_DEF + %2:_() = G_INSERT_SUBVECTOR %0(), %1, 4 + $v8 = COPY %2() + PseudoRET implicit $v8 +...