diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 960c7956e00119..49f1504d244ed2 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -571,6 +571,36 @@ bool InterleavedAccessImpl::lowerInterleavedStore( return true; } +static bool isInterleaveIntrinsic(Intrinsic::ID IID) { + switch (IID) { + case Intrinsic::vector_interleave2: + case Intrinsic::vector_interleave3: + case Intrinsic::vector_interleave4: + case Intrinsic::vector_interleave5: + case Intrinsic::vector_interleave6: + case Intrinsic::vector_interleave7: + case Intrinsic::vector_interleave8: + return true; + default: + return false; + } +} + +static bool isDeinterleaveIntrinsic(Intrinsic::ID IID) { + switch (IID) { + case Intrinsic::vector_deinterleave2: + case Intrinsic::vector_deinterleave3: + case Intrinsic::vector_deinterleave4: + case Intrinsic::vector_deinterleave5: + case Intrinsic::vector_deinterleave6: + case Intrinsic::vector_deinterleave7: + case Intrinsic::vector_deinterleave8: + return true; + default: + return false; + } +} + static unsigned getIntrinsicFactor(const IntrinsicInst *II) { switch (II->getIntrinsicID()) { case Intrinsic::vector_deinterleave2: @@ -579,12 +609,21 @@ static unsigned getIntrinsicFactor(const IntrinsicInst *II) { case Intrinsic::vector_deinterleave3: case Intrinsic::vector_interleave3: return 3; + case Intrinsic::vector_deinterleave4: + case Intrinsic::vector_interleave4: + return 4; case Intrinsic::vector_deinterleave5: case Intrinsic::vector_interleave5: return 5; + case Intrinsic::vector_deinterleave6: + case Intrinsic::vector_interleave6: + return 6; case Intrinsic::vector_deinterleave7: case Intrinsic::vector_interleave7: return 7; + case Intrinsic::vector_deinterleave8: + case Intrinsic::vector_interleave8: + return 8; default: llvm_unreachable("Unexpected intrinsic"); } @@ -605,10 +644,9 @@ static unsigned getIntrinsicFactor(const IntrinsicInst *II) { // to reorder them by interleaving these values. static void interleaveLeafValues(MutableArrayRef SubLeaves) { unsigned NumLeaves = SubLeaves.size(); - if (NumLeaves == 2 || !isPowerOf2_64(NumLeaves)) - return; - assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1); + if (NumLeaves == 2) + return; const unsigned HalfLeaves = NumLeaves / 2; // Visit the sub-trees. @@ -627,10 +665,7 @@ static void interleaveLeafValues(MutableArrayRef SubLeaves) { static bool getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl &Operands, SmallVectorImpl &DeadInsts) { - assert(II->getIntrinsicID() == Intrinsic::vector_interleave2 || - II->getIntrinsicID() == Intrinsic::vector_interleave3 || - II->getIntrinsicID() == Intrinsic::vector_interleave5 || - II->getIntrinsicID() == Intrinsic::vector_interleave7); + assert(isInterleaveIntrinsic(II->getIntrinsicID())); // Visit with BFS SmallVector Queue; @@ -660,13 +695,17 @@ getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl &Operands, } const unsigned Factor = Operands.size(); - // Currently we only recognize factors of 3, 5, 7, and powers of 2. + // Currently we only recognize factors 2...8 and other powers of 2. // FIXME: should we assert here instead? if (Factor <= 1 || (!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II))) return false; - interleaveLeafValues(Operands); + // Recursively interleaved factors need to have their values reordered + // TODO: Remove once the loop vectorizer no longer recursively interleaves + // factors 4 + 8 + if (isPowerOf2_32(Factor) && getIntrinsicFactor(II) == 2) + interleaveLeafValues(Operands); return true; } @@ -674,10 +713,7 @@ static bool getVectorDeinterleaveFactor(IntrinsicInst *II, SmallVectorImpl &Results, SmallVectorImpl &DeadInsts) { - assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2 || - II->getIntrinsicID() == Intrinsic::vector_deinterleave3 || - II->getIntrinsicID() == Intrinsic::vector_deinterleave5 || - II->getIntrinsicID() == Intrinsic::vector_deinterleave7); + assert(isDeinterleaveIntrinsic(II->getIntrinsicID())); using namespace PatternMatch; if (!II->hasNUses(getIntrinsicFactor(II))) return false; @@ -737,13 +773,17 @@ getVectorDeinterleaveFactor(IntrinsicInst *II, } const unsigned Factor = Results.size(); - // Currently we only recognize factors of 3, 5, 7, and powers of 2. + // Currently we only recognize factors of 2...8 and other powers of 2. // FIXME: should we assert here instead? if (Factor <= 1 || (!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II))) return 0; - interleaveLeafValues(Results); + // Recursively interleaved factors need to have their values reordered + // TODO: Remove once the loop vectorizer no longer recursively interleaves + // factors 4 + 8 + if (isPowerOf2_32(Factor) && getIntrinsicFactor(II) == 2) + interleaveLeafValues(Results); return true; } @@ -902,24 +942,10 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) { Changed |= lowerInterleavedStore(&I, DeadInsts); if (auto *II = dyn_cast(&I)) { - // At present, we only have intrinsics to represent (de)interleaving - // with a factor of 2,3,5 and 7. - switch (II->getIntrinsicID()) { - case Intrinsic::vector_deinterleave2: - case Intrinsic::vector_deinterleave3: - case Intrinsic::vector_deinterleave5: - case Intrinsic::vector_deinterleave7: + if (isDeinterleaveIntrinsic(II->getIntrinsicID())) Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts); - break; - case Intrinsic::vector_interleave2: - case Intrinsic::vector_interleave3: - case Intrinsic::vector_interleave5: - case Intrinsic::vector_interleave7: + else if (isInterleaveIntrinsic(II->getIntrinsicID())) Changed |= lowerInterleaveIntrinsic(II, DeadInsts); - break; - default: - break; - } } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index 31529b17836512..c2ae1ce491389b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -279,6 +279,26 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_fact ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vlseg4e8.v v8, (a0) +; CHECK-NEXT: ret + %vec = load <32 x i8>, ptr %p + %d0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.vector.deinterleave4(<32 x i8> %vec) + %t0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 0 + %t1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 1 + %t2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 2 + %t3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 3 + %res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0 + %res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1 + %res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2 + %res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3 + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3 +} + +; TODO: Remove once recursive deinterleaving support is removed +define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor4_recursive(ptr %p) { +; CHECK-LABEL: vector_deinterleave_load_factor4_recursive: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret %vec = load <32 x i8>, ptr %p %d0 = call {<16 x i8>, <16 x i8>} @llvm.vector.deinterleave2.v32i8(<32 x i8> %vec) @@ -319,6 +339,29 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4 } +define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor6(ptr %p) { +; CHECK-LABEL: vector_deinterleave_load_factor6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlseg6e8.v v8, (a0) +; CHECK-NEXT: ret + %vec = load <48 x i8>, ptr %p + %d0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.vector.deinterleave6(<48 x i8> %vec) + %t0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 0 + %t1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 1 + %t2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 2 + %t3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 3 + %t4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 4 + %t5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 5 + %res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0 + %res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1 + %res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2 + %res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3 + %res4 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t4, 4 + %res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4, <8 x i8> %t5, 5 + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res5 +} + define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor7(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor7: ; CHECK: # %bb.0: @@ -339,14 +382,43 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2 %res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3 %res4 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t4, 4 - %res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t5, 5 - %res6 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t6, 6 + %res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4, <8 x i8> %t5, 5 + %res6 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res5, <8 x i8> %t6, 6 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res6 } -define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave_load_factor8(ptr %ptr) { +define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor8(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlseg8e8.v v8, (a0) +; CHECK-NEXT: vmv1r.v v15, v14 +; CHECK-NEXT: ret + %vec = load <64 x i8>, ptr %p + %d0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.vector.deinterleave8(<64 x i8> %vec) + %t0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 0 + %t1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 1 + %t2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 2 + %t3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 3 + %t4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 4 + %t5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 5 + %t6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 6 + %t7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 7 + %res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0 + %res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1 + %res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2 + %res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3 + %res4 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t4, 4 + %res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4, <8 x i8> %t5, 5 + %res6 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res5, <8 x i8> %t6, 6 + %res7 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res6, <8 x i8> %t6, 7 + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res7 +} + +; TODO: Remove once recursive deinterleaving support is removed +define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave_load_factor8_recursive(ptr %ptr) { +; CHECK-LABEL: vector_deinterleave_load_factor8_recursive: +; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll index 8244db45a7ef2f..c394e7aa2e3e84 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll @@ -197,6 +197,18 @@ define void @vector_interleave_store_factor4(<4 x i32> %a, <4 x i32> %b, <4 x i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsseg4e32.v v8, (a0) +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vector.interleave4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) + store <16 x i32> %v, ptr %p + ret void +} + +; TODO: Remove once recursive interleaving support is removed +define void @vector_interleave_store_factor4_recursive(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, ptr %p) { +; CHECK-LABEL: vector_interleave_store_factor4_recursive: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsseg4e32.v v8, (a0) ; CHECK-NEXT: ret %v0 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %c) %v1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %b, <4 x i32> %d) @@ -216,6 +228,17 @@ define void @vector_interleave_store_factor5(<4 x i32> %a, <4 x i32> %b, <4 x i3 ret void } +define void @vector_interleave_store_factor6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, ptr %p) { +; CHECK-LABEL: vector_interleave_store_factor6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsseg6e32.v v8, (a0) +; CHECK-NEXT: ret + %v = call <24 x i32> @llvm.vector.interleave6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f) + store <24 x i32> %v, ptr %p + ret void +} + define void @vector_interleave_store_factor7(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, ptr %p) { ; CHECK-LABEL: vector_interleave_store_factor7: ; CHECK: # %bb.0: @@ -232,6 +255,18 @@ define void @vector_interleave_store_factor8(<4 x i32> %a, <4 x i32> %b, <4 x i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsseg8e32.v v8, (a0) +; CHECK-NEXT: ret + %v = call <32 x i32> @llvm.vector.interleave8(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h) + store <32 x i32> %v, ptr %p + ret void +} + +; TODO: Remove once recursive interleaving support is removed +define void @vector_interleave_store_factor8_recursive(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h, ptr %p) { +; CHECK-LABEL: vector_interleave_store_factor8_recursive: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsseg8e32.v v8, (a0) ; CHECK-NEXT: ret %v0 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %e) %v1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %c, <4 x i32> %g) diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 0483bbbd35b39b..9344c520986845 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -366,6 +366,26 @@ define { , , , , ptr %p + %d0 = call { , , , } @llvm.vector.deinterleave4( %vec) + %t0 = extractvalue { , , , } %d0, 0 + %t1 = extractvalue { , , , } %d0, 1 + %t2 = extractvalue { , , , } %d0, 2 + %t3 = extractvalue { , , , } %d0, 3 + %res0 = insertvalue { , , , } poison, %t0, 0 + %res1 = insertvalue { , , , } %res0, %t1, 1 + %res2 = insertvalue { , , , } %res1, %t2, 2 + %res3 = insertvalue { , , , } %res2, %t3, 3 + ret { , , , } %res3 +} + +; TODO: Remove once recursive deinterleaving support is removed +define { , , , } @vector_deinterleave_load_factor4_recursive(ptr %p) { +; CHECK-LABEL: vector_deinterleave_load_factor4_recursive: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret %vec = load , ptr %p %d0 = call {, } @llvm.vector.deinterleave2.nxv32i8( %vec) @@ -406,6 +426,29 @@ define { , , , , , , , } %res4 } +define { , , , , , } @vector_deinterleave_load_factor6(ptr %p) { +; CHECK-LABEL: vector_deinterleave_load_factor6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vlseg6e8.v v8, (a0) +; CHECK-NEXT: ret + %vec = load , ptr %p + %d0 = call { , , , , , } @llvm.vector.deinterleave6( %vec) + %t0 = extractvalue { , , , , , } %d0, 0 + %t1 = extractvalue { , , , , , } %d0, 1 + %t2 = extractvalue { , , , , , } %d0, 2 + %t3 = extractvalue { , , , , , } %d0, 3 + %t4 = extractvalue { , , , , , } %d0, 4 + %t5 = extractvalue { , , , , , } %d0, 5 + %res0 = insertvalue { , , , , , } poison, %t0, 0 + %res1 = insertvalue { , , , , , } %res0, %t1, 1 + %res2 = insertvalue { , , , , , } %res1, %t2, 2 + %res3 = insertvalue { , , , , , } %res2, %t3, 3 + %res4 = insertvalue { , , , , , } %res3, %t4, 4 + %res5 = insertvalue { , , , , , } %res4, %t5, 5 + ret { , , , , , } %res5 +} + define { , , , , , , } @vector_deinterleave_load_factor7(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor7: ; CHECK: # %bb.0: @@ -426,14 +469,42 @@ define { , , , , , , , , , } %res1, %t2, 2 %res3 = insertvalue { , , , , , , } %res2, %t3, 3 %res4 = insertvalue { , , , , , , } %res3, %t4, 4 - %res5 = insertvalue { , , , , , , } %res3, %t5, 5 - %res6 = insertvalue { , , , , , , } %res3, %t6, 6 + %res5 = insertvalue { , , , , , , } %res4, %t5, 5 + %res6 = insertvalue { , , , , , , } %res5, %t6, 6 ret { , , , , , , } %res6 } -define {, , , , , , , } @vector_deinterleave_load_factor8(ptr %ptr) { +define { , , , , , , , } @vector_deinterleave_load_factor8(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor8: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vlseg8e8.v v8, (a0) +; CHECK-NEXT: ret + %vec = load , ptr %p + %d0 = call { , , , , , , , } @llvm.vector.deinterleave8( %vec) + %t0 = extractvalue { , , , , , , , } %d0, 0 + %t1 = extractvalue { , , , , , , , } %d0, 1 + %t2 = extractvalue { , , , , , , , } %d0, 2 + %t3 = extractvalue { , , , , , , , } %d0, 3 + %t4 = extractvalue { , , , , , , , } %d0, 4 + %t5 = extractvalue { , , , , , , , } %d0, 5 + %t6 = extractvalue { , , , , , , , } %d0, 6 + %t7 = extractvalue { , , , , , , , } %d0, 7 + %res0 = insertvalue { , , , , , , , } poison, %t0, 0 + %res1 = insertvalue { , , , , , , , } %res0, %t1, 1 + %res2 = insertvalue { , , , , , , , } %res1, %t2, 2 + %res3 = insertvalue { , , , , , , , } %res2, %t3, 3 + %res4 = insertvalue { , , , , , , , } %res3, %t4, 4 + %res5 = insertvalue { , , , , , , , } %res4, %t5, 5 + %res6 = insertvalue { , , , , , , , } %res5, %t6, 6 + %res7 = insertvalue { , , , , , , , } %res6, %t7, 7 + ret { , , , , , , , } %res7 +} + +; TODO: Remove once recursive deinterleaving support is removed +define {, , , , , , , } @vector_deinterleave_load_factor8_recursive(ptr %ptr) { +; CHECK-LABEL: vector_deinterleave_load_factor8_recursive: +; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll index 4332ca411d91b1..3751967f18aa48 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -250,9 +250,21 @@ define void @vector_interleave_store_factor3( %a, %a, %b, %c, %d, ptr %p) { +define void @vector_interleave_store_factor4( %a, %b, %c, %d, ptr %p) { ; CHECK-LABEL: vector_interleave_store_factor4: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vsseg4e32.v v8, (a0) +; CHECK-NEXT: ret + %v = call @llvm.vector.interleave4( %a, %b, %c, %d) + store %v, ptr %p + ret void +} + +; TODO: Remove once recursive interleaving support is removed +define void @vector_interleave_store_factor4_recursive( %a, %b, %c, %d, ptr %p) { +; CHECK-LABEL: vector_interleave_store_factor4_recursive: +; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vsseg4e32.v v8, (a0) ; CHECK-NEXT: ret @@ -274,6 +286,17 @@ define void @vector_interleave_store_factor5( %a, %a, %b, %c, %d, %e, %f, ptr %p) { +; CHECK-LABEL: vector_interleave_store_factor6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vsseg6e32.v v8, (a0) +; CHECK-NEXT: ret + %v = call @llvm.vector.interleave6( %a, %b, %c, %d, %e, %f) + store %v, ptr %p + ret void +} + define void @vector_interleave_store_factor7( %a, %b, %c, %d, %e, %f, %g, ptr %p) { ; CHECK-LABEL: vector_interleave_store_factor7: ; CHECK: # %bb.0: @@ -290,6 +313,18 @@ define void @vector_interleave_store_factor8( %a, @llvm.vector.interleave8( %a, %b, %c, %d, %e, %f, %g, %h) + store %v, ptr %p + ret void +} + +; TODO: Remove once recursive interleaving support is removed +define void @vector_interleave_store_factor8_recursive( %a, %b, %c, %d, %e, %f, %g, %h, ptr %p) { +; CHECK-LABEL: vector_interleave_store_factor8_recursive: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vsseg8e32.v v8, (a0) ; CHECK-NEXT: ret %v0 = call @llvm.vector.interleave2.nxv4i32( %a, %e) %v1 = call @llvm.vector.interleave2.nxv4i32( %c, %g) diff --git a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll index 0a20e03d0dff1e..87b16d17aa5f0b 100644 --- a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll @@ -165,6 +165,49 @@ define void @load_factor4_vscale(ptr %ptr) { ; RV64-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP9]], 2 ; RV64-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP9]], 3 ; RV64-NEXT: ret void +; + %interleaved.vec = load , ptr %ptr + %v = call { , , , } @llvm.vector.deinterleave4.nxv16i32( %interleaved.vec) + %t0 = extractvalue { , , , } %v, 0 + %t1 = extractvalue { , , , } %v, 1 + %t2 = extractvalue { , , , } %v, 2 + %t3 = extractvalue { , , , } %v, 3 + ret void +} + +; TODO: Remove once recursive deinterleaving support is removed +define void @load_factor4_vscale_recursive(ptr %ptr) { +; RV32-LABEL: @load_factor4_vscale_recursive( +; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t.p0.i32(target("riscv.vector.tuple", , 4) poison, ptr [[PTR:%.*]], i32 -1, i32 5) +; RV32-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 0) +; RV32-NEXT: [[TMP3:%.*]] = insertvalue { , , , } poison, [[TMP2]], 0 +; RV32-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 1) +; RV32-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP3]], [[TMP4]], 1 +; RV32-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 2) +; RV32-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 2 +; RV32-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 3) +; RV32-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 3 +; RV32-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP9]], 0 +; RV32-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP9]], 1 +; RV32-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP9]], 2 +; RV32-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP9]], 3 +; RV32-NEXT: ret void +; +; RV64-LABEL: @load_factor4_vscale_recursive( +; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t.p0.i64(target("riscv.vector.tuple", , 4) poison, ptr [[PTR:%.*]], i64 -1, i64 5) +; RV64-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 0) +; RV64-NEXT: [[TMP3:%.*]] = insertvalue { , , , } poison, [[TMP2]], 0 +; RV64-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 1) +; RV64-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP3]], [[TMP4]], 1 +; RV64-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 2) +; RV64-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 2 +; RV64-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", , 4) [[TMP1]], i32 3) +; RV64-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 3 +; RV64-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[TMP9]], 0 +; RV64-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[TMP9]], 1 +; RV64-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[TMP9]], 2 +; RV64-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[TMP9]], 3 +; RV64-NEXT: ret void ; %interleaved.vec = load , ptr %ptr %d0 = call { , } @llvm.vector.deinterleave2.nxv16i32( %interleaved.vec) @@ -287,6 +330,62 @@ define void @load_factor6(ptr %ptr) { ret void } +define void @load_factor6_vscale(ptr %ptr) { +; RV32-LABEL: @load_factor6_vscale( +; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t.p0.i32(target("riscv.vector.tuple", , 6) poison, ptr [[PTR:%.*]], i32 -1, i32 5) +; RV32-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) [[TMP1]], i32 0) +; RV32-NEXT: [[TMP3:%.*]] = insertvalue { , , , , , } poison, [[TMP2]], 0 +; RV32-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) [[TMP1]], i32 1) +; RV32-NEXT: [[TMP5:%.*]] = insertvalue { , , , , , } [[TMP3]], [[TMP4]], 1 +; RV32-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) [[TMP1]], i32 2) +; RV32-NEXT: [[TMP7:%.*]] = insertvalue { , , , , , } [[TMP5]], [[TMP6]], 2 +; RV32-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) [[TMP1]], i32 3) +; RV32-NEXT: [[TMP9:%.*]] = insertvalue { , , , , , } [[TMP7]], [[TMP8]], 3 +; RV32-NEXT: [[TMP10:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) [[TMP1]], i32 4) +; RV32-NEXT: [[TMP11:%.*]] = insertvalue { , , , , , } [[TMP9]], [[TMP10]], 4 +; RV32-NEXT: [[TMP12:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) [[TMP1]], i32 5) +; RV32-NEXT: [[TMP13:%.*]] = insertvalue { , , , , , } [[TMP11]], [[TMP12]], 5 +; RV32-NEXT: [[TMP14:%.*]] = extractvalue { , , , , , } [[TMP13]], 0 +; RV32-NEXT: [[TMP15:%.*]] = extractvalue { , , , , , } [[TMP13]], 1 +; RV32-NEXT: [[TMP16:%.*]] = extractvalue { , , , , , } [[TMP13]], 2 +; RV32-NEXT: [[TMP17:%.*]] = extractvalue { , , , , , } [[TMP13]], 3 +; RV32-NEXT: [[TMP18:%.*]] = extractvalue { , , , , , } [[TMP13]], 4 +; RV32-NEXT: [[TMP19:%.*]] = extractvalue { , , , , , } [[TMP13]], 5 +; RV32-NEXT: ret void +; +; RV64-LABEL: @load_factor6_vscale( +; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t.p0.i64(target("riscv.vector.tuple", , 6) poison, ptr [[PTR:%.*]], i64 -1, i64 5) +; RV64-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) [[TMP1]], i32 0) +; RV64-NEXT: [[TMP3:%.*]] = insertvalue { , , , , , } poison, [[TMP2]], 0 +; RV64-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) [[TMP1]], i32 1) +; RV64-NEXT: [[TMP5:%.*]] = insertvalue { , , , , , } [[TMP3]], [[TMP4]], 1 +; RV64-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) [[TMP1]], i32 2) +; RV64-NEXT: [[TMP7:%.*]] = insertvalue { , , , , , } [[TMP5]], [[TMP6]], 2 +; RV64-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) [[TMP1]], i32 3) +; RV64-NEXT: [[TMP9:%.*]] = insertvalue { , , , , , } [[TMP7]], [[TMP8]], 3 +; RV64-NEXT: [[TMP10:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) [[TMP1]], i32 4) +; RV64-NEXT: [[TMP11:%.*]] = insertvalue { , , , , , } [[TMP9]], [[TMP10]], 4 +; RV64-NEXT: [[TMP12:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", , 6) [[TMP1]], i32 5) +; RV64-NEXT: [[TMP13:%.*]] = insertvalue { , , , , , } [[TMP11]], [[TMP12]], 5 +; RV64-NEXT: [[TMP14:%.*]] = extractvalue { , , , , , } [[TMP13]], 0 +; RV64-NEXT: [[TMP15:%.*]] = extractvalue { , , , , , } [[TMP13]], 1 +; RV64-NEXT: [[TMP16:%.*]] = extractvalue { , , , , , } [[TMP13]], 2 +; RV64-NEXT: [[TMP17:%.*]] = extractvalue { , , , , , } [[TMP13]], 3 +; RV64-NEXT: [[TMP18:%.*]] = extractvalue { , , , , , } [[TMP13]], 4 +; RV64-NEXT: [[TMP19:%.*]] = extractvalue { , , , , , } [[TMP13]], 5 +; RV64-NEXT: ret void +; + %interleaved.vec = load , ptr %ptr + %v = call { , , , , , } @llvm.vector.deinterleave6.nxv12i32( %interleaved.vec) + %t0 = extractvalue { , , , , , } %v, 0 + %t1 = extractvalue { , , , , , } %v, 1 + %t2 = extractvalue { , , , , , } %v, 2 + %t3 = extractvalue { , , , , , } %v, 3 + %t4 = extractvalue { , , , , , } %v, 4 + %t5 = extractvalue { , , , , , } %v, 5 + ret void +} + define void @load_factor7(ptr %ptr) { ; RV32-LABEL: @load_factor7( ; RV32-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg7.load.mask.v4i32.p0.i32(ptr [[PTR:%.*]], <4 x i1> splat (i1 true), i32 4) @@ -477,6 +576,77 @@ define void @load_factor8_vscale(ptr %ptr) { ; RV64-NEXT: [[TMP24:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 6 ; RV64-NEXT: [[TMP25:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 7 ; RV64-NEXT: ret void +; + %interleaved.vec = load , ptr %ptr + %v = call { , , , , , , , } @llvm.vector.deinterleave8.nxv16i32( %interleaved.vec) + %t0 = extractvalue { , , , , , , , } %v, 0 + %t1 = extractvalue { , , , , , , , } %v, 1 + %t2 = extractvalue { , , , , , , , } %v, 2 + %t3 = extractvalue { , , , , , , , } %v, 3 + %t4 = extractvalue { , , , , , , , } %v, 4 + %t5 = extractvalue { , , , , , , , } %v, 5 + %t6 = extractvalue { , , , , , , , } %v, 6 + %t7 = extractvalue { , , , , , , , } %v, 7 + ret void +} + +; TODO: Remove once recursive deinterleaving support is removed +define void @load_factor8_vscale_recursive(ptr %ptr) { +; RV32-LABEL: @load_factor8_vscale_recursive( +; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t.p0.i32(target("riscv.vector.tuple", , 8) poison, ptr [[PTR:%.*]], i32 -1, i32 5) +; RV32-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 0) +; RV32-NEXT: [[TMP3:%.*]] = insertvalue { , , , , , , , } poison, [[TMP2]], 0 +; RV32-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 1) +; RV32-NEXT: [[TMP5:%.*]] = insertvalue { , , , , , , , } [[TMP3]], [[TMP4]], 1 +; RV32-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 2) +; RV32-NEXT: [[TMP7:%.*]] = insertvalue { , , , , , , , } [[TMP5]], [[TMP6]], 2 +; RV32-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 3) +; RV32-NEXT: [[TMP9:%.*]] = insertvalue { , , , , , , , } [[TMP7]], [[TMP8]], 3 +; RV32-NEXT: [[TMP10:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 4) +; RV32-NEXT: [[TMP11:%.*]] = insertvalue { , , , , , , , } [[TMP9]], [[TMP10]], 4 +; RV32-NEXT: [[TMP12:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 5) +; RV32-NEXT: [[TMP13:%.*]] = insertvalue { , , , , , , , } [[TMP11]], [[TMP12]], 5 +; RV32-NEXT: [[TMP14:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 6) +; RV32-NEXT: [[TMP15:%.*]] = insertvalue { , , , , , , , } [[TMP13]], [[TMP14]], 6 +; RV32-NEXT: [[TMP16:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 7) +; RV32-NEXT: [[TMP17:%.*]] = insertvalue { , , , , , , , } [[TMP15]], [[TMP16]], 7 +; RV32-NEXT: [[TMP18:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 0 +; RV32-NEXT: [[TMP19:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 1 +; RV32-NEXT: [[TMP20:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 2 +; RV32-NEXT: [[TMP21:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 3 +; RV32-NEXT: [[TMP22:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 4 +; RV32-NEXT: [[TMP23:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 5 +; RV32-NEXT: [[TMP24:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 6 +; RV32-NEXT: [[TMP25:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 7 +; RV32-NEXT: ret void +; +; RV64-LABEL: @load_factor8_vscale_recursive( +; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", , 8) poison, ptr [[PTR:%.*]], i64 -1, i64 5) +; RV64-NEXT: [[TMP2:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 0) +; RV64-NEXT: [[TMP3:%.*]] = insertvalue { , , , , , , , } poison, [[TMP2]], 0 +; RV64-NEXT: [[TMP4:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 1) +; RV64-NEXT: [[TMP5:%.*]] = insertvalue { , , , , , , , } [[TMP3]], [[TMP4]], 1 +; RV64-NEXT: [[TMP6:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 2) +; RV64-NEXT: [[TMP7:%.*]] = insertvalue { , , , , , , , } [[TMP5]], [[TMP6]], 2 +; RV64-NEXT: [[TMP8:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 3) +; RV64-NEXT: [[TMP9:%.*]] = insertvalue { , , , , , , , } [[TMP7]], [[TMP8]], 3 +; RV64-NEXT: [[TMP10:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 4) +; RV64-NEXT: [[TMP11:%.*]] = insertvalue { , , , , , , , } [[TMP9]], [[TMP10]], 4 +; RV64-NEXT: [[TMP12:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 5) +; RV64-NEXT: [[TMP13:%.*]] = insertvalue { , , , , , , , } [[TMP11]], [[TMP12]], 5 +; RV64-NEXT: [[TMP14:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 6) +; RV64-NEXT: [[TMP15:%.*]] = insertvalue { , , , , , , , } [[TMP13]], [[TMP14]], 6 +; RV64-NEXT: [[TMP16:%.*]] = call @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", , 8) [[TMP1]], i32 7) +; RV64-NEXT: [[TMP17:%.*]] = insertvalue { , , , , , , , } [[TMP15]], [[TMP16]], 7 +; RV64-NEXT: [[TMP18:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 0 +; RV64-NEXT: [[TMP19:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 1 +; RV64-NEXT: [[TMP20:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 2 +; RV64-NEXT: [[TMP21:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 3 +; RV64-NEXT: [[TMP22:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 4 +; RV64-NEXT: [[TMP23:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 5 +; RV64-NEXT: [[TMP24:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 6 +; RV64-NEXT: [[TMP25:%.*]] = extractvalue { , , , , , , , } [[TMP17]], 7 +; RV64-NEXT: ret void ; %interleaved.vec = load , ptr %ptr %d0 = call { , } @llvm.vector.deinterleave2.nxv16i32( %interleaved.vec) @@ -616,16 +786,39 @@ define void @store_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2 ret void } -define void @store_factor4_vscale(ptr %ptr, %v0, %v1) { +define void @store_factor4_vscale(ptr %ptr, %v0, %v1, %v2, %v3) { ; RV32-LABEL: @store_factor4_vscale( ; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) poison, [[V0:%.*]], i32 0) +; RV32-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP1]], [[V1:%.*]], i32 1) +; RV32-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP2]], [[V2:%.*]], i32 2) +; RV32-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP3]], [[V3:%.*]], i32 3) +; RV32-NEXT: call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv8i8_4t.p0.i32(target("riscv.vector.tuple", , 4) [[TMP4]], ptr [[PTR:%.*]], i32 -1, i32 3) +; RV32-NEXT: ret void +; +; RV64-LABEL: @store_factor4_vscale( +; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) poison, [[V0:%.*]], i32 0) +; RV64-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP1]], [[V1:%.*]], i32 1) +; RV64-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP2]], [[V2:%.*]], i32 2) +; RV64-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP3]], [[V3:%.*]], i32 3) +; RV64-NEXT: call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv8i8_4t.p0.i64(target("riscv.vector.tuple", , 4) [[TMP4]], ptr [[PTR:%.*]], i64 -1, i64 3) +; RV64-NEXT: ret void +; + %interleaved.vec = call @llvm.vector.interleave4.nxv8i8( %v0, %v1, %v2, %v3) + store %interleaved.vec, ptr %ptr + ret void +} + +; TODO: Remove once recursive interleaving support is removed +define void @store_factor4_vscale_recursive(ptr %ptr, %v0, %v1) { +; RV32-LABEL: @store_factor4_vscale_recursive( +; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) poison, [[V0:%.*]], i32 0) ; RV32-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP1]], [[V0]], i32 1) ; RV32-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP2]], [[V1:%.*]], i32 2) ; RV32-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP3]], [[V1]], i32 3) ; RV32-NEXT: call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv8i8_4t.p0.i32(target("riscv.vector.tuple", , 4) [[TMP4]], ptr [[PTR:%.*]], i32 -1, i32 3) ; RV32-NEXT: ret void ; -; RV64-LABEL: @store_factor4_vscale( +; RV64-LABEL: @store_factor4_vscale_recursive( ; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) poison, [[V0:%.*]], i32 0) ; RV64-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP1]], [[V0]], i32 1) ; RV64-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 4) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_4t.nxv8i8(target("riscv.vector.tuple", , 4) [[TMP2]], [[V1:%.*]], i32 2) @@ -736,6 +929,32 @@ define void @store_factor4_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32 ret void } +define void @store_factor6_vscale(ptr %ptr, %v0, %v1, %v2, %v3, %v4, %v5) { +; RV32-LABEL: @store_factor6_vscale( +; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_6t.nxv8i8(target("riscv.vector.tuple", , 6) poison, [[V0:%.*]], i32 0) +; RV32-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_6t.nxv8i8(target("riscv.vector.tuple", , 6) [[TMP1]], [[V1:%.*]], i32 1) +; RV32-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_6t.nxv8i8(target("riscv.vector.tuple", , 6) [[TMP2]], [[V2:%.*]], i32 2) +; RV32-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_6t.nxv8i8(target("riscv.vector.tuple", , 6) [[TMP3]], [[V3:%.*]], i32 3) +; RV32-NEXT: [[TMP5:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_6t.nxv8i8(target("riscv.vector.tuple", , 6) [[TMP4]], [[V4:%.*]], i32 4) +; RV32-NEXT: [[TMP6:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_6t.nxv8i8(target("riscv.vector.tuple", , 6) [[TMP5]], [[V5:%.*]], i32 5) +; RV32-NEXT: call void @llvm.riscv.vsseg6.triscv.vector.tuple_nxv8i8_6t.p0.i32(target("riscv.vector.tuple", , 6) [[TMP6]], ptr [[PTR:%.*]], i32 -1, i32 3) +; RV32-NEXT: ret void +; +; RV64-LABEL: @store_factor6_vscale( +; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_6t.nxv8i8(target("riscv.vector.tuple", , 6) poison, [[V0:%.*]], i32 0) +; RV64-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_6t.nxv8i8(target("riscv.vector.tuple", , 6) [[TMP1]], [[V1:%.*]], i32 1) +; RV64-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_6t.nxv8i8(target("riscv.vector.tuple", , 6) [[TMP2]], [[V2:%.*]], i32 2) +; RV64-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_6t.nxv8i8(target("riscv.vector.tuple", , 6) [[TMP3]], [[V3:%.*]], i32 3) +; RV64-NEXT: [[TMP5:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_6t.nxv8i8(target("riscv.vector.tuple", , 6) [[TMP4]], [[V4:%.*]], i32 4) +; RV64-NEXT: [[TMP6:%.*]] = call target("riscv.vector.tuple", , 6) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_6t.nxv8i8(target("riscv.vector.tuple", , 6) [[TMP5]], [[V5:%.*]], i32 5) +; RV64-NEXT: call void @llvm.riscv.vsseg6.triscv.vector.tuple_nxv8i8_6t.p0.i64(target("riscv.vector.tuple", , 6) [[TMP6]], ptr [[PTR:%.*]], i64 -1, i64 3) +; RV64-NEXT: ret void +; + %interleaved.vec = call @llvm.vector.interleave6.nxv8i8( %v0, %v1, %v2, %v3, %v4, %v5) + store %interleaved.vec, ptr %ptr + ret void +} + define void @store_factor7_vscale(ptr %ptr, %v0, %v1, %v2, %v3, %v4, %v5, %v6) { ; RV32-LABEL: @store_factor7_vscale( ; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv8i8(target("riscv.vector.tuple", , 7) poison, [[V0:%.*]], i32 0) @@ -764,9 +983,40 @@ define void @store_factor7_vscale(ptr %ptr, %v0, %v0, %v1, %v2, %v3) { +define void @store_factor8_vscale(ptr %ptr, %v0, %v1, %v2, %v3, %v4, %v5, %v6, %v7) { ; RV32-LABEL: @store_factor8_vscale( ; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) poison, [[V0:%.*]], i32 0) +; RV32-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP1]], [[V1:%.*]], i32 1) +; RV32-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP2]], [[V2:%.*]], i32 2) +; RV32-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP3]], [[V3:%.*]], i32 3) +; RV32-NEXT: [[TMP5:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP4]], [[V4:%.*]], i32 4) +; RV32-NEXT: [[TMP6:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP5]], [[V5:%.*]], i32 5) +; RV32-NEXT: [[TMP7:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP6]], [[V6:%.*]], i32 6) +; RV32-NEXT: [[TMP8:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP7]], [[V7:%.*]], i32 7) +; RV32-NEXT: call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i32(target("riscv.vector.tuple", , 8) [[TMP8]], ptr [[PTR:%.*]], i32 -1, i32 3) +; RV32-NEXT: ret void +; +; RV64-LABEL: @store_factor8_vscale( +; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) poison, [[V0:%.*]], i32 0) +; RV64-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP1]], [[V1:%.*]], i32 1) +; RV64-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP2]], [[V2:%.*]], i32 2) +; RV64-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP3]], [[V3:%.*]], i32 3) +; RV64-NEXT: [[TMP5:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP4]], [[V4:%.*]], i32 4) +; RV64-NEXT: [[TMP6:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP5]], [[V5:%.*]], i32 5) +; RV64-NEXT: [[TMP7:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP6]], [[V6:%.*]], i32 6) +; RV64-NEXT: [[TMP8:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP7]], [[V7:%.*]], i32 7) +; RV64-NEXT: call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", , 8) [[TMP8]], ptr [[PTR:%.*]], i64 -1, i64 3) +; RV64-NEXT: ret void +; + %interleaved.vec = call @llvm.vector.interleave8.nxv8i8( %v0, %v1, %v2, %v3, %v4, %v5, %v6, %v7) + store %interleaved.vec, ptr %ptr + ret void +} + +; TODO: Remove once recursive interleaving support is removed +define void @store_factor8_vscale_recursive(ptr %ptr, %v0, %v1, %v2, %v3) { +; RV32-LABEL: @store_factor8_vscale_recursive( +; RV32-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) poison, [[V0:%.*]], i32 0) ; RV32-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP1]], [[V2:%.*]], i32 1) ; RV32-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP2]], [[V0]], i32 2) ; RV32-NEXT: [[TMP4:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP3]], [[V2]], i32 3) @@ -777,7 +1027,7 @@ define void @store_factor8_vscale(ptr %ptr, %v0, , 8) [[TMP8]], ptr [[PTR:%.*]], i32 -1, i32 3) ; RV32-NEXT: ret void ; -; RV64-LABEL: @store_factor8_vscale( +; RV64-LABEL: @store_factor8_vscale_recursive( ; RV64-NEXT: [[TMP1:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) poison, [[V0:%.*]], i32 0) ; RV64-NEXT: [[TMP2:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP1]], [[V2:%.*]], i32 1) ; RV64-NEXT: [[TMP3:%.*]] = call target("riscv.vector.tuple", , 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv8i8(target("riscv.vector.tuple", , 8) [[TMP2]], [[V0]], i32 2)