diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 03099e9ad44dc..1749ac1770da9 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3234,7 +3234,7 @@ class TargetLoweringBase { /// Lower a deinterleave intrinsic to a target specific load intrinsic. /// Return true on success. Currently only supports - /// llvm.vector.deinterleave2 + /// llvm.vector.deinterleave{2,3,5,7} /// /// \p LI is the accompanying load instruction. /// \p DeinterleaveValues contains the deinterleaved values. @@ -3246,7 +3246,7 @@ class TargetLoweringBase { /// Lower an interleave intrinsic to a target specific store intrinsic. /// Return true on success. Currently only supports - /// llvm.vector.interleave2 + /// llvm.vector.interleave{2,3,5,7} /// /// \p SI is the accompanying store instruction /// \p InterleaveValues contains the interleaved values. diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 04d89d61cb6a9..960c7956e0011 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -571,6 +571,25 @@ bool InterleavedAccessImpl::lowerInterleavedStore( return true; } +static unsigned getIntrinsicFactor(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + case Intrinsic::vector_deinterleave2: + case Intrinsic::vector_interleave2: + return 2; + case Intrinsic::vector_deinterleave3: + case Intrinsic::vector_interleave3: + return 3; + case Intrinsic::vector_deinterleave5: + case Intrinsic::vector_interleave5: + return 5; + case Intrinsic::vector_deinterleave7: + case Intrinsic::vector_interleave7: + return 7; + default: + llvm_unreachable("Unexpected intrinsic"); + } +} + // For an (de)interleave tree like this: // // A C B D @@ -586,7 +605,7 @@ bool InterleavedAccessImpl::lowerInterleavedStore( // to reorder them by interleaving these values. static void interleaveLeafValues(MutableArrayRef SubLeaves) { unsigned NumLeaves = SubLeaves.size(); - if (NumLeaves == 2) + if (NumLeaves == 2 || !isPowerOf2_64(NumLeaves)) return; assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1); @@ -608,7 +627,10 @@ static void interleaveLeafValues(MutableArrayRef SubLeaves) { static bool getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl &Operands, SmallVectorImpl &DeadInsts) { - assert(II->getIntrinsicID() == Intrinsic::vector_interleave2); + assert(II->getIntrinsicID() == Intrinsic::vector_interleave2 || + II->getIntrinsicID() == Intrinsic::vector_interleave3 || + II->getIntrinsicID() == Intrinsic::vector_interleave5 || + II->getIntrinsicID() == Intrinsic::vector_interleave7); // Visit with BFS SmallVector Queue; @@ -620,7 +642,7 @@ getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl &Operands, // All the intermediate intrinsics will be deleted. DeadInsts.push_back(Current); - for (unsigned I = 0; I < 2; ++I) { + for (unsigned I = 0; I < getIntrinsicFactor(Current); ++I) { Value *Op = Current->getOperand(I); if (auto *OpII = dyn_cast(Op)) if (OpII->getIntrinsicID() == Intrinsic::vector_interleave2) { @@ -638,9 +660,10 @@ getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl &Operands, } const unsigned Factor = Operands.size(); - // Currently we only recognize power-of-two factors. + // Currently we only recognize factors of 3, 5, 7, and powers of 2. // FIXME: should we assert here instead? - if (Factor <= 1 || !isPowerOf2_32(Factor)) + if (Factor <= 1 || + (!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II))) return false; interleaveLeafValues(Operands); @@ -651,9 +674,12 @@ static bool getVectorDeinterleaveFactor(IntrinsicInst *II, SmallVectorImpl &Results, SmallVectorImpl &DeadInsts) { - assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2); + assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2 || + II->getIntrinsicID() == Intrinsic::vector_deinterleave3 || + II->getIntrinsicID() == Intrinsic::vector_deinterleave5 || + II->getIntrinsicID() == Intrinsic::vector_deinterleave7); using namespace PatternMatch; - if (!II->hasNUses(2)) + if (!II->hasNUses(getIntrinsicFactor(II))) return false; // Visit with BFS @@ -662,12 +688,12 @@ getVectorDeinterleaveFactor(IntrinsicInst *II, while (!Queue.empty()) { IntrinsicInst *Current = Queue.front(); Queue.erase(Queue.begin()); - assert(Current->hasNUses(2)); + assert(Current->hasNUses(getIntrinsicFactor(Current))); // All the intermediate intrinsics will be deleted from the bottom-up. DeadInsts.insert(DeadInsts.begin(), Current); - ExtractValueInst *LHS = nullptr, *RHS = nullptr; + SmallVector EVs(getIntrinsicFactor(Current), nullptr); for (User *Usr : Current->users()) { if (!isa(Usr)) return 0; @@ -679,17 +705,15 @@ getVectorDeinterleaveFactor(IntrinsicInst *II, if (Indices.size() != 1) return false; - if (Indices[0] == 0 && !LHS) - LHS = EV; - else if (Indices[0] == 1 && !RHS) - RHS = EV; + if (!EVs[Indices[0]]) + EVs[Indices[0]] = EV; else return false; } // We have legal indices. At this point we're either going // to continue the traversal or push the leaf values into Results. - for (ExtractValueInst *EV : {LHS, RHS}) { + for (ExtractValueInst *EV : EVs) { // Continue the traversal. We're playing safe here and matching only the // expression consisting of a perfectly balanced binary tree in which all // intermediate values are only used once. @@ -713,9 +737,10 @@ getVectorDeinterleaveFactor(IntrinsicInst *II, } const unsigned Factor = Results.size(); - // Currently we only recognize power-of-two factors. + // Currently we only recognize factors of 3, 5, 7, and powers of 2. // FIXME: should we assert here instead? - if (Factor <= 1 || !isPowerOf2_32(Factor)) + if (Factor <= 1 || + (!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II))) return 0; interleaveLeafValues(Results); @@ -878,11 +903,23 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) { if (auto *II = dyn_cast(&I)) { // At present, we only have intrinsics to represent (de)interleaving - // with a factor of 2. - if (II->getIntrinsicID() == Intrinsic::vector_deinterleave2) + // with a factor of 2,3,5 and 7. + switch (II->getIntrinsicID()) { + case Intrinsic::vector_deinterleave2: + case Intrinsic::vector_deinterleave3: + case Intrinsic::vector_deinterleave5: + case Intrinsic::vector_deinterleave7: Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts); - else if (II->getIntrinsicID() == Intrinsic::vector_interleave2) + break; + case Intrinsic::vector_interleave2: + case Intrinsic::vector_interleave3: + case Intrinsic::vector_interleave5: + case Intrinsic::vector_interleave7: Changed |= lowerInterleaveIntrinsic(II, DeadInsts); + break; + default: + break; + } } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index e53dfc23a84bb..31529b1783651 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -257,6 +257,23 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_load_v2f64_v4f64(ptr %p ret {<2 x double>, <2 x double>} %res1 } +define { <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3(ptr %p) { +; CHECK-LABEL: vector_deinterleave_load_factor3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: ret + %vec = load <24 x i8>, ptr %p + %d0 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec) + %t0 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 0 + %t1 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 1 + %t2 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 2 + %res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0 + %res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 0 + %res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 0 + ret { <8 x i8>, <8 x i8>, <8 x i8> } %res2 +} + define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor4(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor4: ; CHECK: # %bb.0: @@ -281,6 +298,52 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_fact ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3 } +define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor5(ptr %p) { +; CHECK-LABEL: vector_deinterleave_load_factor5: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlseg5e8.v v8, (a0) +; CHECK-NEXT: ret + %vec = load <40 x i8>, ptr %p + %d0 = call {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave5(<40 x i8> %vec) + %t0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 0 + %t1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 1 + %t2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 2 + %t3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 3 + %t4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 4 + %res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0 + %res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1 + %res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2 + %res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3 + %res4 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t4, 4 + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4 +} + +define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor7(ptr %p) { +; CHECK-LABEL: vector_deinterleave_load_factor7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlseg7e8.v v8, (a0) +; CHECK-NEXT: ret + %vec = load <56 x i8>, ptr %p + %d0 = call {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave7(<56 x i8> %vec) + %t0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 0 + %t1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 1 + %t2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 2 + %t3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 3 + %t4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 4 + %t5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 5 + %t6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 6 + %res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0 + %res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1 + %res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2 + %res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3 + %res4 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t4, 4 + %res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t5, 5 + %res6 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t6, 6 + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res6 +} + define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave_load_factor8(ptr %ptr) { ; CHECK-LABEL: vector_deinterleave_load_factor8: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll index 26c3db6131034..8244db45a7ef2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll @@ -181,6 +181,17 @@ define void @vector_interleave_store_v4f64_v2f64(<2 x double> %a, <2 x double> % ret void } +define void @vector_interleave_store_factor3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, ptr %p) { +; CHECK-LABEL: vector_interleave_store_factor3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsseg3e32.v v8, (a0) +; CHECK-NEXT: ret + %v = call <12 x i32> @llvm.vector.interleave3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) + store <12 x i32> %v, ptr %p + ret void +} + define void @vector_interleave_store_factor4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, ptr %p) { ; CHECK-LABEL: vector_interleave_store_factor4: ; CHECK: # %bb.0: @@ -194,6 +205,28 @@ define void @vector_interleave_store_factor4(<4 x i32> %a, <4 x i32> %b, <4 x i3 ret void } +define void @vector_interleave_store_factor5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, ptr %p) { +; CHECK-LABEL: vector_interleave_store_factor5: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsseg5e32.v v8, (a0) +; CHECK-NEXT: ret + %v = call <20 x i32> @llvm.vector.interleave5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e) + store <20 x i32> %v, ptr %p + ret void +} + +define void @vector_interleave_store_factor7(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, ptr %p) { +; CHECK-LABEL: vector_interleave_store_factor7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsseg7e32.v v8, (a0) +; CHECK-NEXT: ret + %v = call <28 x i32> @llvm.vector.interleave7(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g) + store <28 x i32> %v, ptr %p + ret void +} + define void @vector_interleave_store_factor8(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h, ptr %p) { ; CHECK-LABEL: vector_interleave_store_factor8: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 582aef908964a..0483bbbd35b39 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -344,6 +344,23 @@ define {, } @vector_deinterleave_load_nxv2p0 ret {, } %res1 } +define { , , } @vector_deinterleave_load_factor3(ptr %p) { +; CHECK-LABEL: vector_deinterleave_load_factor3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: ret + %vec = load , ptr %p + %d0 = call {, , } @llvm.vector.deinterleave3( %vec) + %t0 = extractvalue {, , } %d0, 0 + %t1 = extractvalue {, , } %d0, 1 + %t2 = extractvalue {, , } %d0, 2 + %res0 = insertvalue { , , } poison, %t0, 0 + %res1 = insertvalue { , , } %res0, %t1, 0 + %res2 = insertvalue { , , } %res1, %t2, 0 + ret { , , } %res2 +} + define { , , , } @vector_deinterleave_load_factor4(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor4: ; CHECK: # %bb.0: @@ -368,6 +385,52 @@ define { , , , , , , } %res3 } +define { , , , , } @vector_deinterleave_load_factor5(ptr %p) { +; CHECK-LABEL: vector_deinterleave_load_factor5: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vlseg5e8.v v8, (a0) +; CHECK-NEXT: ret + %vec = load , ptr %p + %d0 = call {, , , , } @llvm.vector.deinterleave5( %vec) + %t0 = extractvalue { , , , , } %d0, 0 + %t1 = extractvalue { , , , , } %d0, 1 + %t2 = extractvalue { , , , , } %d0, 2 + %t3 = extractvalue { , , , , } %d0, 3 + %t4 = extractvalue { , , , , } %d0, 4 + %res0 = insertvalue { , , , , } poison, %t0, 0 + %res1 = insertvalue { , , , , } %res0, %t1, 1 + %res2 = insertvalue { , , , , } %res1, %t2, 2 + %res3 = insertvalue { , , , , } %res2, %t3, 3 + %res4 = insertvalue { , , , , } %res3, %t4, 4 + ret { , , , , } %res4 +} + +define { , , , , , , } @vector_deinterleave_load_factor7(ptr %p) { +; CHECK-LABEL: vector_deinterleave_load_factor7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vlseg7e8.v v8, (a0) +; CHECK-NEXT: ret + %vec = load , ptr %p + %d0 = call {, , , , , , } @llvm.vector.deinterleave7( %vec) + %t0 = extractvalue { , , , , , , } %d0, 0 + %t1 = extractvalue { , , , , , , } %d0, 1 + %t2 = extractvalue { , , , , , , } %d0, 2 + %t3 = extractvalue { , , , , , , } %d0, 3 + %t4 = extractvalue { , , , , , , } %d0, 4 + %t5 = extractvalue { , , , , , , } %d0, 5 + %t6 = extractvalue { , , , , , , } %d0, 6 + %res0 = insertvalue { , , , , , , } poison, %t0, 0 + %res1 = insertvalue { , , , , , , } %res0, %t1, 1 + %res2 = insertvalue { , , , , , , } %res1, %t2, 2 + %res3 = insertvalue { , , , , , , } %res2, %t3, 3 + %res4 = insertvalue { , , , , , , } %res3, %t4, 4 + %res5 = insertvalue { , , , , , , } %res3, %t5, 5 + %res6 = insertvalue { , , , , , , } %res3, %t6, 6 + ret { , , , , , , } %res6 +} + define {, , , , , , , } @vector_deinterleave_load_factor8(ptr %ptr) { ; CHECK-LABEL: vector_deinterleave_load_factor8: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll index b5eb312bf5e18..4332ca411d91b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -239,6 +239,17 @@ define void @vector_interleave_store_nxv4p0_nxv2p0( %a, %a, %b, %c, ptr %p) { +; CHECK-LABEL: vector_interleave_store_factor3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vsseg3e32.v v8, (a0) +; CHECK-NEXT: ret + %v = call @llvm.vector.interleave3( %a, %b, %c) + store %v, ptr %p + ret void +} + define void @vector_interleave_store_factor4( %a, %b, %c, %d, ptr %p) { ; CHECK-LABEL: vector_interleave_store_factor4: ; CHECK: # %bb.0: @@ -252,6 +263,28 @@ define void @vector_interleave_store_factor4( %a, %a, %b, %c, %d, %e, ptr %p) { +; CHECK-LABEL: vector_interleave_store_factor5: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vsseg5e32.v v8, (a0) +; CHECK-NEXT: ret + %v = call @llvm.vector.interleave5( %a, %b, %c, %d, %e) + store %v, ptr %p + ret void +} + +define void @vector_interleave_store_factor7( %a, %b, %c, %d, %e, %f, %g, ptr %p) { +; CHECK-LABEL: vector_interleave_store_factor7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vsseg7e32.v v8, (a0) +; CHECK-NEXT: ret + %v = call @llvm.vector.interleave7( %a, %b, %c, %d, %e, %f, %g) + store %v, ptr %p + ret void +} + define void @vector_interleave_store_factor8( %a, %b, %c, %d, %e, %f, %g, %h, ptr %p) { ; CHECK-LABEL: vector_interleave_store_factor8: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll index d0f35aa8b85e9..142ee5256f9e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll @@ -28,6 +28,44 @@ define {, } @load_factor2_v2(ptr %ptr, i32 % ret { , } %res1 } +define {, , } @load_factor3_v2(ptr %ptr, i32 %evl) { +; RV32-LABEL: load_factor3_v2: +; RV32: # %bb.0: +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: lui a2, 699051 +; RV32-NEXT: addi a2, a2, -1365 +; RV32-NEXT: mulhu a1, a1, a2 +; RV32-NEXT: srli a1, a1, 1 +; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV32-NEXT: vlseg3e32.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: load_factor3_v2: +; RV64: # %bb.0: +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: lui a2, 699051 +; RV64-NEXT: addi a2, a2, -1365 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: slli a2, a2, 32 +; RV64-NEXT: mulhu a1, a1, a2 +; RV64-NEXT: srli a1, a1, 33 +; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV64-NEXT: vlseg3e32.v v8, (a0) +; RV64-NEXT: ret + %rvl = mul i32 %evl, 3 + %wide.masked.load = call @llvm.vp.load(ptr %ptr, splat (i1 true), i32 %rvl) + %deinterleaved.results = call { , , } @llvm.vector.deinterleave3( %wide.masked.load) + %t0 = extractvalue { , , } %deinterleaved.results, 0 + %t1 = extractvalue { , , } %deinterleaved.results, 1 + %t2 = extractvalue { , , } %deinterleaved.results, 2 + %res0 = insertvalue { , , } poison, %t0, 0 + %res1 = insertvalue { , , } %res0, %t1, 1 + %res2 = insertvalue { , , } %res1, %t2, 2 + ret { , , } %res1 +} + define {, , , } @load_factor4_v2(ptr %ptr, i32 %evl) { ; RV32-LABEL: load_factor4_v2: ; RV32: # %bb.0: @@ -63,6 +101,101 @@ define {, , , , , , } %res3 } +define {, , , , } @load_factor5_v2(ptr %ptr, i32 %evl) { +; RV32-LABEL: load_factor5_v2: +; RV32: # %bb.0: +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: lui a2, 838861 +; RV32-NEXT: addi a2, a2, -819 +; RV32-NEXT: mulhu a1, a1, a2 +; RV32-NEXT: srli a1, a1, 2 +; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV32-NEXT: vlseg5e32.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: load_factor5_v2: +; RV64: # %bb.0: +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: lui a2, 838861 +; RV64-NEXT: addi a2, a2, -819 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: slli a2, a2, 32 +; RV64-NEXT: mulhu a1, a1, a2 +; RV64-NEXT: srli a1, a1, 34 +; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV64-NEXT: vlseg5e32.v v8, (a0) +; RV64-NEXT: ret + %rvl = mul i32 %evl, 5 + %wide.masked.load = call @llvm.vp.load(ptr %ptr, splat (i1 true), i32 %rvl) + %deinterleaved.results = call { , , , , } @llvm.vector.deinterleave5( %wide.masked.load) + %t0 = extractvalue { , , , , } %deinterleaved.results, 0 + %t1 = extractvalue { , , , , } %deinterleaved.results, 1 + %t2 = extractvalue { , , , , } %deinterleaved.results, 2 + %t3 = extractvalue { , , , , } %deinterleaved.results, 3 + %t4 = extractvalue { , , , , } %deinterleaved.results, 4 + %res0 = insertvalue { , , , , } poison, %t0, 0 + %res1 = insertvalue { , , , , } %res0, %t1, 1 + %res2 = insertvalue { , , , , } %res1, %t2, 2 + %res3 = insertvalue { , , , , } %res2, %t3, 3 + %res4 = insertvalue { , , , , } %res3, %t4, 4 + ret { , , , , } %res4 +} + +define {, , , , , , } @load_factor7_v2(ptr %ptr, i32 %evl) { +; RV32-LABEL: load_factor7_v2: +; RV32: # %bb.0: +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a2, a2, a1 +; RV32-NEXT: lui a1, 149797 +; RV32-NEXT: addi a1, a1, -1755 +; RV32-NEXT: mulhu a1, a2, a1 +; RV32-NEXT: sub a2, a2, a1 +; RV32-NEXT: srli a2, a2, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: srli a1, a1, 2 +; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV32-NEXT: vlseg7e32.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: load_factor7_v2: +; RV64: # %bb.0: +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: lui a3, 149797 +; RV64-NEXT: subw a2, a2, a1 +; RV64-NEXT: addi a1, a3, -1755 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: mulhu a1, a3, a1 +; RV64-NEXT: srli a1, a1, 32 +; RV64-NEXT: subw a2, a2, a1 +; RV64-NEXT: srliw a2, a2, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: srli a1, a1, 2 +; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV64-NEXT: vlseg7e32.v v8, (a0) +; RV64-NEXT: ret + %rvl = mul i32 %evl, 7 + %wide.masked.load = call @llvm.vp.load(ptr %ptr, splat (i1 true), i32 %rvl) + %deinterleaved.results = call { , , , , , , } @llvm.vector.deinterleave7( %wide.masked.load) + %t0 = extractvalue { , , , , , , } %deinterleaved.results, 0 + %t1 = extractvalue { , , , , , , } %deinterleaved.results, 1 + %t2 = extractvalue { , , , , , , } %deinterleaved.results, 2 + %t3 = extractvalue { , , , , , , } %deinterleaved.results, 3 + %t4 = extractvalue { , , , , , , } %deinterleaved.results, 4 + %t5 = extractvalue { , , , , , , } %deinterleaved.results, 5 + %t6 = extractvalue { , , , , , , } %deinterleaved.results, 6 + %res0 = insertvalue { , , , , , , } poison, %t0, 0 + %res1 = insertvalue { , , , , , , } %res0, %t1, 1 + %res2 = insertvalue { , , , , , , } %res1, %t2, 2 + %res3 = insertvalue { , , , , , , } %res2, %t3, 3 + %res4 = insertvalue { , , , , , , } %res3, %t4, 4 + %res5 = insertvalue { , , , , , , } %res4, %t5, 5 + %res6 = insertvalue { , , , , , , } %res5, %t6, 6 + ret { , , , , , , } %res6 +} + define {, , , , , , , } @load_factor8_v2(ptr %ptr, i32 %evl) { ; RV32-LABEL: load_factor8_v2: ; RV32: # %bb.0: @@ -137,6 +270,38 @@ define void @store_factor2_v2( %v0, %v1, pt ret void } +define void @store_factor3_v2( %v0, %v1, %v2, ptr %ptr, i32 %evl) { +; RV32-LABEL: store_factor3_v2: +; RV32: # %bb.0: +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: lui a2, 699051 +; RV32-NEXT: addi a2, a2, -1365 +; RV32-NEXT: mulhu a1, a1, a2 +; RV32-NEXT: srli a1, a1, 1 +; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; RV32-NEXT: vsseg3e32.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_factor3_v2: +; RV64: # %bb.0: +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: lui a2, 699051 +; RV64-NEXT: addi a2, a2, -1365 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: slli a2, a2, 32 +; RV64-NEXT: mulhu a1, a1, a2 +; RV64-NEXT: srli a1, a1, 33 +; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; RV64-NEXT: vsseg3e32.v v8, (a0) +; RV64-NEXT: ret + %rvl = mul i32 %evl, 3 + %interleaved.vec = call @llvm.vector.interleave3( %v0, %v1, %v2) + call void @llvm.vp.store( %interleaved.vec, ptr %ptr, splat (i1 true), i32 %rvl) + ret void +} + define void @store_factor4_v2( %v0, %v1, ptr %ptr, i32 %evl) { ; RV32-LABEL: store_factor4_v2: ; RV32: # %bb.0: @@ -165,6 +330,77 @@ define void @store_factor4_v2( %v0, %v1, pt ret void } +define void @store_factor5_v2( %v0, %v1, %v2, %v3, %v4, ptr %ptr, i32 %evl) { +; RV32-LABEL: store_factor5_v2: +; RV32: # %bb.0: +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: lui a2, 838861 +; RV32-NEXT: addi a2, a2, -819 +; RV32-NEXT: mulhu a1, a1, a2 +; RV32-NEXT: srli a1, a1, 2 +; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; RV32-NEXT: vsseg5e32.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_factor5_v2: +; RV64: # %bb.0: +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: lui a2, 838861 +; RV64-NEXT: addi a2, a2, -819 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: slli a2, a2, 32 +; RV64-NEXT: mulhu a1, a1, a2 +; RV64-NEXT: srli a1, a1, 34 +; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; RV64-NEXT: vsseg5e32.v v8, (a0) +; RV64-NEXT: ret + %rvl = mul i32 %evl, 5 + %interleaved.vec = call @llvm.vector.interleave5( %v0, %v1, %v2, %v3, %v4) + call void @llvm.vp.store( %interleaved.vec, ptr %ptr, splat (i1 true), i32 %rvl) + ret void +} + +define void @store_factor7_v2( %v0, %v1, %v2, %v3, %v4, %v5, %v6, ptr %ptr, i32 %evl) { +; RV32-LABEL: store_factor7_v2: +; RV32: # %bb.0: +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a2, a2, a1 +; RV32-NEXT: lui a1, 149797 +; RV32-NEXT: addi a1, a1, -1755 +; RV32-NEXT: mulhu a1, a2, a1 +; RV32-NEXT: sub a2, a2, a1 +; RV32-NEXT: srli a2, a2, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: srli a1, a1, 2 +; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; RV32-NEXT: vsseg7e32.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_factor7_v2: +; RV64: # %bb.0: +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: lui a3, 149797 +; RV64-NEXT: subw a2, a2, a1 +; RV64-NEXT: addi a1, a3, -1755 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: mulhu a1, a3, a1 +; RV64-NEXT: srli a1, a1, 32 +; RV64-NEXT: subw a2, a2, a1 +; RV64-NEXT: srliw a2, a2, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: srli a1, a1, 2 +; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; RV64-NEXT: vsseg7e32.v v8, (a0) +; RV64-NEXT: ret + %rvl = mul i32 %evl, 7 + %interleaved.vec = call @llvm.vector.interleave7( %v0, %v1, %v2, %v3, %v4, %v5, %v6) + call void @llvm.vp.store( %interleaved.vec, ptr %ptr, splat (i1 true), i32 %rvl) + ret void +} + define void @store_factor8_v2( %v0, %v1, ptr %ptr, i32 %evl) { ; RV32-LABEL: store_factor8_v2: ; RV32: # %bb.0: diff --git a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll index f2e2950992421..bd79ec9a09599 100644 --- a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll @@ -73,6 +73,41 @@ define void @load_factor3(ptr %ptr) { ret void } +define void @load_factor3_vscale(ptr %ptr) { +; RV32-LABEL: @load_factor3_vscale( +; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t.i32(target("riscv.vector.tuple", , 3) poison, ptr [[PTR:%.*]], i32 -1, i32 5) +; RV32-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) [[TMP1]], i32 0) +; RV32-NEXT: [[TMP3:%.*]] = insertvalue { , , } poison, [[TMP2]], 0 +; RV32-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) [[TMP1]], i32 1) +; RV32-NEXT: [[TMP5:%.*]] = insertvalue { , , } [[TMP3]], [[TMP4]], 1 +; RV32-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) [[TMP1]], i32 2) +; RV32-NEXT: [[V:%.*]] = insertvalue { , , } [[TMP5]], [[TMP6]], 2 +; RV32-NEXT: [[T0:%.*]] = extractvalue { , , } [[V]], 0 +; RV32-NEXT: [[T1:%.*]] = extractvalue { , , } [[V]], 1 +; RV32-NEXT: [[T2:%.*]] = extractvalue { , , } [[V]], 2 +; RV32-NEXT: ret void +; +; RV64-LABEL: @load_factor3_vscale( +; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t.i64(target("riscv.vector.tuple", , 3) poison, ptr [[PTR:%.*]], i64 -1, i64 5) +; RV64-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) [[TMP1]], i32 0) +; RV64-NEXT: [[TMP3:%.*]] = insertvalue { , , } poison, [[TMP2]], 0 +; RV64-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) [[TMP1]], i32 1) +; RV64-NEXT: [[TMP5:%.*]] = insertvalue { , , } [[TMP3]], [[TMP4]], 1 +; RV64-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", , 3) [[TMP1]], i32 2) +; RV64-NEXT: [[V:%.*]] = insertvalue { , , } [[TMP5]], [[TMP6]], 2 +; RV64-NEXT: [[T0:%.*]] = extractvalue { , , } [[V]], 0 +; RV64-NEXT: [[T1:%.*]] = extractvalue { , , } [[V]], 1 +; RV64-NEXT: [[T2:%.*]] = extractvalue { , , } [[V]], 2 +; RV64-NEXT: ret void +; + %interleaved.vec = load , ptr %ptr + %v = call { , , } @llvm.vector.deinterleave3.nxv6i32( %interleaved.vec) + %t0 = extractvalue { , , } %v, 0 + %t1 = extractvalue { , , } %v, 1 + %t2 = extractvalue { , , } %v, 2 + ret void +} + define void @load_factor4(ptr %ptr) { ; RV32-LABEL: @load_factor4( ; RV32-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg4.load.mask.v4i32.i32(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i32 4) @@ -172,6 +207,55 @@ define void @load_factor5(ptr %ptr) { ret void } +define void @load_factor5_vscale(ptr %ptr) { +; RV32-LABEL: @load_factor5_vscale( +; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t.i32(target("riscv.vector.tuple", , 5) poison, ptr [[PTR:%.*]], i32 -1, i32 5) +; RV32-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) [[TMP1]], i32 0) +; RV32-NEXT: [[TMP3:%.*]] = insertvalue { , , , , } poison, [[TMP2]], 0 +; RV32-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) [[TMP1]], i32 1) +; RV32-NEXT: [[TMP5:%.*]] = insertvalue { , , , , } [[TMP3]], [[TMP4]], 1 +; RV32-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) [[TMP1]], i32 2) +; RV32-NEXT: [[TMP7:%.*]] = insertvalue { , , , , } [[TMP5]], [[TMP6]], 2 +; RV32-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) [[TMP1]], i32 3) +; RV32-NEXT: [[TMP9:%.*]] = insertvalue { , , , , } [[TMP7]], [[TMP8]], 3 +; RV32-NEXT: [[TMP10:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) [[TMP1]], i32 4) +; RV32-NEXT: [[V:%.*]] = insertvalue { , , , , } [[TMP9]], [[TMP10]], 4 +; RV32-NEXT: [[T0:%.*]] = extractvalue { , , , , } [[V]], 0 +; RV32-NEXT: [[T1:%.*]] = extractvalue { , , , , } [[V]], 1 +; RV32-NEXT: [[T2:%.*]] = extractvalue { , , , , } [[V]], 2 +; RV32-NEXT: [[T3:%.*]] = extractvalue { , , , , } [[V]], 3 +; RV32-NEXT: [[T4:%.*]] = extractvalue { , , , , } [[V]], 4 +; RV32-NEXT: ret void +; +; RV64-LABEL: @load_factor5_vscale( +; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t.i64(target("riscv.vector.tuple", , 5) poison, ptr [[PTR:%.*]], i64 -1, i64 5) +; RV64-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) [[TMP1]], i32 0) +; RV64-NEXT: [[TMP3:%.*]] = insertvalue { , , , , } poison, [[TMP2]], 0 +; RV64-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) [[TMP1]], i32 1) +; RV64-NEXT: [[TMP5:%.*]] = insertvalue { , , , , } [[TMP3]], [[TMP4]], 1 +; RV64-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) [[TMP1]], i32 2) +; RV64-NEXT: [[TMP7:%.*]] = insertvalue { , , , , } [[TMP5]], [[TMP6]], 2 +; RV64-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) [[TMP1]], i32 3) +; RV64-NEXT: [[TMP9:%.*]] = insertvalue { , , , , } [[TMP7]], [[TMP8]], 3 +; RV64-NEXT: [[TMP10:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", , 5) [[TMP1]], i32 4) +; RV64-NEXT: [[V:%.*]] = insertvalue { , , , , } [[TMP9]], [[TMP10]], 4 +; RV64-NEXT: [[T0:%.*]] = extractvalue { , , , , } [[V]], 0 +; RV64-NEXT: [[T1:%.*]] = extractvalue { , , , , } [[V]], 1 +; RV64-NEXT: [[T2:%.*]] = extractvalue { , , , , } [[V]], 2 +; RV64-NEXT: [[T3:%.*]] = extractvalue { , , , , } [[V]], 3 +; RV64-NEXT: [[T4:%.*]] = extractvalue { , , , , } [[V]], 4 +; RV64-NEXT: ret void +; + %interleaved.vec = load , ptr %ptr + %v = call { , , , , } @llvm.vector.deinterleave5.nxv10i32( %interleaved.vec) + %t0 = extractvalue { , , , , } %v, 0 + %t1 = extractvalue { , , , , } %v, 1 + %t2 = extractvalue { , , , , } %v, 2 + %t3 = extractvalue { , , , , } %v, 3 + %t4 = extractvalue { , , , , } %v, 4 + ret void +} + define void @load_factor6(ptr %ptr) { ; RV32-LABEL: @load_factor6( ; RV32-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg6.load.mask.v4i32.i32(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i32 4) @@ -237,6 +321,69 @@ define void @load_factor7(ptr %ptr) { ret void } +define void @load_factor7_vscale(ptr %ptr) { +; RV32-LABEL: @load_factor7_vscale( +; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t.i32(target("riscv.vector.tuple", , 7) poison, ptr [[PTR:%.*]], i32 -1, i32 5) +; RV32-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 0) +; RV32-NEXT: [[TMP3:%.*]] = insertvalue { , , , , , , } poison, [[TMP2]], 0 +; RV32-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 1) +; RV32-NEXT: [[TMP5:%.*]] = insertvalue { , , , , , , } [[TMP3]], [[TMP4]], 1 +; RV32-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 2) +; RV32-NEXT: [[TMP7:%.*]] = insertvalue { , , , , , , } [[TMP5]], [[TMP6]], 2 +; RV32-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 3) +; RV32-NEXT: [[TMP9:%.*]] = insertvalue { , , , , , , } [[TMP7]], [[TMP8]], 3 +; RV32-NEXT: [[TMP10:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 4) +; RV32-NEXT: [[TMP11:%.*]] = insertvalue { , , , , , , } [[TMP9]], [[TMP10]], 4 +; RV32-NEXT: [[TMP12:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 5) +; RV32-NEXT: [[TMP13:%.*]] = insertvalue { , , , , , , } [[TMP11]], [[TMP12]], 5 +; RV32-NEXT: [[TMP14:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 6) +; RV32-NEXT: [[V:%.*]] = insertvalue { , , , , , , } [[TMP13]], [[TMP14]], 6 +; RV32-NEXT: [[T0:%.*]] = extractvalue { , , , , , , } [[V]], 0 +; RV32-NEXT: [[T1:%.*]] = extractvalue { , , , , , , } [[V]], 1 +; RV32-NEXT: [[T2:%.*]] = extractvalue { , , , , , , } [[V]], 2 +; RV32-NEXT: [[T3:%.*]] = extractvalue { , , , , , , } [[V]], 3 +; RV32-NEXT: [[T4:%.*]] = extractvalue { , , , , , , } [[V]], 4 +; RV32-NEXT: [[T5:%.*]] = extractvalue { , , , , , , } [[V]], 5 +; RV32-NEXT: [[T6:%.*]] = extractvalue { , , , , , , } [[V]], 6 +; RV32-NEXT: ret void +; +; RV64-LABEL: @load_factor7_vscale( +; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t.i64(target("riscv.vector.tuple", , 7) poison, ptr [[PTR:%.*]], i64 -1, i64 5) +; RV64-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 0) +; RV64-NEXT: [[TMP3:%.*]] = insertvalue { , , , , , , } poison, [[TMP2]], 0 +; RV64-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 1) +; RV64-NEXT: [[TMP5:%.*]] = insertvalue { , , , , , , } [[TMP3]], [[TMP4]], 1 +; RV64-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 2) +; RV64-NEXT: [[TMP7:%.*]] = insertvalue { , , , , , , } [[TMP5]], [[TMP6]], 2 +; RV64-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 3) +; RV64-NEXT: [[TMP9:%.*]] = insertvalue { , , , , , , } [[TMP7]], [[TMP8]], 3 +; RV64-NEXT: [[TMP10:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 4) +; RV64-NEXT: [[TMP11:%.*]] = insertvalue { , , , , , , } [[TMP9]], [[TMP10]], 4 +; RV64-NEXT: [[TMP12:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 5) +; RV64-NEXT: [[TMP13:%.*]] = insertvalue { , , , , , , } [[TMP11]], [[TMP12]], 5 +; RV64-NEXT: [[TMP14:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", , 7) [[TMP1]], i32 6) +; RV64-NEXT: [[V:%.*]] = insertvalue { , , , , , , } [[TMP13]], [[TMP14]], 6 +; RV64-NEXT: [[T0:%.*]] = extractvalue { , , , , , , } [[V]], 0 +; RV64-NEXT: [[T1:%.*]] = extractvalue { , , , , , , } [[V]], 1 +; RV64-NEXT: [[T2:%.*]] = extractvalue { , , , , , , } [[V]], 2 +; RV64-NEXT: [[T3:%.*]] = extractvalue { , , , , , , } [[V]], 3 +; RV64-NEXT: [[T4:%.*]] = extractvalue { , , , , , , } [[V]], 4 +; RV64-NEXT: [[T5:%.*]] = extractvalue { , , , , , , } [[V]], 5 +; RV64-NEXT: [[T6:%.*]] = extractvalue { , , , , , , } [[V]], 6 +; RV64-NEXT: ret void +; + %interleaved.vec = load , ptr %ptr + %v = call { , , , , , , } @llvm.vector.deinterleave7.nxv14i32( %interleaved.vec) + %t0 = extractvalue { , , , , , , } %v, 0 + %t1 = extractvalue { , , , , , , } %v, 1 + %t2 = extractvalue { , , , , , , } %v, 2 + %t3 = extractvalue { , , , , , , } %v, 3 + %t4 = extractvalue { , , , , , , } %v, 4 + %t5 = extractvalue { , , , , , , } %v, 5 + %t6 = extractvalue { , , , , , , } %v, 6 + ret void +} + define void @load_factor8(ptr %ptr) { ; RV32-LABEL: @load_factor8( ; RV32-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg8.load.mask.v4i32.i32(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i32 4) @@ -421,6 +568,26 @@ define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2 ret void } +define void @store_factor3_vscale(ptr %ptr, %v0, %v1, %v2) { +; RV32-LABEL: @store_factor3_vscale( +; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_3t.nxv8i8(target("riscv.vector.tuple", , 3) poison, [[V0:%.*]], i32 0) +; RV32-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_3t.nxv8i8(target("riscv.vector.tuple", , 3) [[TMP1]], [[V1:%.*]], i32 1) +; RV32-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_3t.nxv8i8(target("riscv.vector.tuple", , 3) [[TMP2]], [[V2:%.*]], i32 2) +; RV32-NEXT: call void @llvm.riscv.vsseg3.triscv.vector.tuple_nxv8i8_3t.i32(target("riscv.vector.tuple", , 3) [[TMP3]], ptr [[PTR:%.*]], i32 -1, i32 3) +; RV32-NEXT: ret void +; +; RV64-LABEL: @store_factor3_vscale( +; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_3t.nxv8i8(target("riscv.vector.tuple", , 3) poison, [[V0:%.*]], i32 0) +; RV64-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_3t.nxv8i8(target("riscv.vector.tuple", , 3) [[TMP1]], [[V1:%.*]], i32 1) +; RV64-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 3) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_3t.nxv8i8(target("riscv.vector.tuple", , 3) [[TMP2]], [[V2:%.*]], i32 2) +; RV64-NEXT: call void @llvm.riscv.vsseg3.triscv.vector.tuple_nxv8i8_3t.i64(target("riscv.vector.tuple", , 3) [[TMP3]], ptr [[PTR:%.*]], i64 -1, i64 3) +; RV64-NEXT: ret void +; + %interleaved.vec = call @llvm.vector.interleave3.nxv8i8( %v0, %v1, %v2) + store %interleaved.vec, ptr %ptr + ret void +} + define void @store_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { ; RV32-LABEL: @store_factor4( ; RV32-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> @@ -473,6 +640,29 @@ define void @store_factor4_vscale(ptr %ptr, %v0, %v0, %v1, %v2, %v3, %v4) { +; RV32-LABEL: @store_factor5_vscale( +; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_5t.nxv8i8(target("riscv.vector.tuple", , 5) poison, [[V0:%.*]], i32 0) +; RV32-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_5t.nxv8i8(target("riscv.vector.tuple", , 5) [[TMP1]], [[V1:%.*]], i32 1) +; RV32-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_5t.nxv8i8(target("riscv.vector.tuple", , 5) [[TMP2]], [[V2:%.*]], i32 2) +; RV32-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_5t.nxv8i8(target("riscv.vector.tuple", , 5) [[TMP3]], [[V3:%.*]], i32 3) +; RV32-NEXT: [[TMP5:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_5t.nxv8i8(target("riscv.vector.tuple", , 5) [[TMP4]], [[V4:%.*]], i32 4) +; RV32-NEXT: call void @llvm.riscv.vsseg5.triscv.vector.tuple_nxv8i8_5t.i32(target("riscv.vector.tuple", , 5) [[TMP5]], ptr [[PTR:%.*]], i32 -1, i32 3) +; RV32-NEXT: ret void +; +; RV64-LABEL: @store_factor5_vscale( +; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_5t.nxv8i8(target("riscv.vector.tuple", , 5) poison, [[V0:%.*]], i32 0) +; RV64-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_5t.nxv8i8(target("riscv.vector.tuple", , 5) [[TMP1]], [[V1:%.*]], i32 1) +; RV64-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_5t.nxv8i8(target("riscv.vector.tuple", , 5) [[TMP2]], [[V2:%.*]], i32 2) +; RV64-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_5t.nxv8i8(target("riscv.vector.tuple", , 5) [[TMP3]], [[V3:%.*]], i32 3) +; RV64-NEXT: [[TMP5:%.*]] = call target("riscv.vector.tuple", , 5) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_5t.nxv8i8(target("riscv.vector.tuple", , 5) [[TMP4]], [[V4:%.*]], i32 4) +; RV64-NEXT: call void @llvm.riscv.vsseg5.triscv.vector.tuple_nxv8i8_5t.i64(target("riscv.vector.tuple", , 5) [[TMP5]], ptr [[PTR:%.*]], i64 -1, i64 3) +; RV64-NEXT: ret void +; + %interleaved.vec = call @llvm.vector.interleave5.nxv8i8( %v0, %v1, %v2, %v3, %v4) + store %interleaved.vec, ptr %ptr + ret void +} define void @store_factor2_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1) { ; RV32-LABEL: @store_factor2_wide( @@ -546,6 +736,34 @@ define void @store_factor4_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32 ret void } +define void @store_factor7_vscale(ptr %ptr, %v0, %v1, %v2, %v3, %v4, %v5, %v6) { +; RV32-LABEL: @store_factor7_vscale( +; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) poison, [[V0:%.*]], i32 0) +; RV32-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) [[TMP1]], [[V1:%.*]], i32 1) +; RV32-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) [[TMP2]], [[V2:%.*]], i32 2) +; RV32-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) [[TMP3]], [[V3:%.*]], i32 3) +; RV32-NEXT: [[TMP5:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) [[TMP4]], [[V4:%.*]], i32 4) +; RV32-NEXT: [[TMP6:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) [[TMP5]], [[V5:%.*]], i32 5) +; RV32-NEXT: [[TMP7:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) [[TMP6]], [[V6:%.*]], i32 6) +; RV32-NEXT: call void @llvm.riscv.vsseg7.triscv.vector.tuple_nxv8i8_7t.i32(target("riscv.vector.tuple", , 7) [[TMP7]], ptr [[PTR:%.*]], i32 -1, i32 3) +; RV32-NEXT: ret void +; +; RV64-LABEL: @store_factor7_vscale( +; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) poison, [[V0:%.*]], i32 0) +; RV64-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) [[TMP1]], [[V1:%.*]], i32 1) +; RV64-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) [[TMP2]], [[V2:%.*]], i32 2) +; RV64-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) [[TMP3]], [[V3:%.*]], i32 3) +; RV64-NEXT: [[TMP5:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) [[TMP4]], [[V4:%.*]], i32 4) +; RV64-NEXT: [[TMP6:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) [[TMP5]], [[V5:%.*]], i32 5) +; RV64-NEXT: [[TMP7:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) [[TMP6]], [[V6:%.*]], i32 6) +; RV64-NEXT: call void @llvm.riscv.vsseg7.triscv.vector.tuple_nxv8i8_7t.i64(target("riscv.vector.tuple", , 7) [[TMP7]], ptr [[PTR:%.*]], i64 -1, i64 3) +; RV64-NEXT: ret void +; + %interleaved.vec = call @llvm.vector.interleave7.nxv8i8( %v0, %v1, %v2, %v3, %v4, %v5, %v6) + store %interleaved.vec, ptr %ptr + ret void +} + define void @store_factor8_vscale(ptr %ptr, %v0, %v1, %v2, %v3) { ; RV32-LABEL: @store_factor8_vscale( ; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) poison, [[V0:%.*]], i32 0)