Skip to content

[IA] Add support for [de]interleave{4,6,8} #141512

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 28, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 37 additions & 9 deletions llvm/lib/CodeGen/InterleavedAccessPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -579,12 +579,21 @@ static unsigned getIntrinsicFactor(const IntrinsicInst *II) {
case Intrinsic::vector_deinterleave3:
case Intrinsic::vector_interleave3:
return 3;
case Intrinsic::vector_deinterleave4:
case Intrinsic::vector_interleave4:
return 4;
case Intrinsic::vector_deinterleave5:
case Intrinsic::vector_interleave5:
return 5;
case Intrinsic::vector_deinterleave6:
case Intrinsic::vector_interleave6:
return 6;
case Intrinsic::vector_deinterleave7:
case Intrinsic::vector_interleave7:
return 7;
case Intrinsic::vector_deinterleave8:
case Intrinsic::vector_interleave8:
return 8;
default:
llvm_unreachable("Unexpected intrinsic");
}
Expand All @@ -605,10 +614,9 @@ static unsigned getIntrinsicFactor(const IntrinsicInst *II) {
// to reorder them by interleaving these values.
static void interleaveLeafValues(MutableArrayRef<Value *> SubLeaves) {
unsigned NumLeaves = SubLeaves.size();
if (NumLeaves == 2 || !isPowerOf2_64(NumLeaves))
return;

assert(isPowerOf2_32(NumLeaves) && NumLeaves > 1);
if (NumLeaves == 2)
return;

const unsigned HalfLeaves = NumLeaves / 2;
// Visit the sub-trees.
Expand All @@ -629,8 +637,11 @@ getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
SmallVectorImpl<Instruction *> &DeadInsts) {
assert(II->getIntrinsicID() == Intrinsic::vector_interleave2 ||
II->getIntrinsicID() == Intrinsic::vector_interleave3 ||
II->getIntrinsicID() == Intrinsic::vector_interleave4 ||
II->getIntrinsicID() == Intrinsic::vector_interleave5 ||
II->getIntrinsicID() == Intrinsic::vector_interleave7);
II->getIntrinsicID() == Intrinsic::vector_interleave6 ||
II->getIntrinsicID() == Intrinsic::vector_interleave7 ||
II->getIntrinsicID() == Intrinsic::vector_interleave8);

// Visit with BFS
SmallVector<IntrinsicInst *, 8> Queue;
Expand Down Expand Up @@ -660,13 +671,17 @@ getVectorInterleaveFactor(IntrinsicInst *II, SmallVectorImpl<Value *> &Operands,
}

const unsigned Factor = Operands.size();
// Currently we only recognize factors of 3, 5, 7, and powers of 2.
// Currently we only recognize factors 2...8 and other powers of 2.
// FIXME: should we assert here instead?
if (Factor <= 1 ||
(!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II)))
return false;

interleaveLeafValues(Operands);
// Recursively interleaved factors need to have their values reordered
// TODO: Remove once the loop vectorizer no longer recursively interleaves
// factors 4 + 8
if (isPowerOf2_32(Factor) && getIntrinsicFactor(II) == 2)
interleaveLeafValues(Operands);
return true;
}

Expand All @@ -676,8 +691,11 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
SmallVectorImpl<Instruction *> &DeadInsts) {
assert(II->getIntrinsicID() == Intrinsic::vector_deinterleave2 ||
II->getIntrinsicID() == Intrinsic::vector_deinterleave3 ||
II->getIntrinsicID() == Intrinsic::vector_deinterleave4 ||
II->getIntrinsicID() == Intrinsic::vector_deinterleave5 ||
II->getIntrinsicID() == Intrinsic::vector_deinterleave7);
II->getIntrinsicID() == Intrinsic::vector_deinterleave6 ||
II->getIntrinsicID() == Intrinsic::vector_deinterleave7 ||
II->getIntrinsicID() == Intrinsic::vector_deinterleave8);
using namespace PatternMatch;
if (!II->hasNUses(getIntrinsicFactor(II)))
return false;
Expand Down Expand Up @@ -737,13 +755,17 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
}

const unsigned Factor = Results.size();
// Currently we only recognize factors of 3, 5, 7, and powers of 2.
// Currently we only recognize factors of 2...8 and other powers of 2.
// FIXME: should we assert here instead?
if (Factor <= 1 ||
(!isPowerOf2_32(Factor) && Factor != getIntrinsicFactor(II)))
return 0;

interleaveLeafValues(Results);
// Recursively interleaved factors need to have their values reordered
// TODO: Remove once the loop vectorizer no longer recursively interleaves
// factors 4 + 8
if (isPowerOf2_32(Factor) && getIntrinsicFactor(II) == 2)
interleaveLeafValues(Results);
return true;
}

Expand Down Expand Up @@ -907,14 +929,20 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
switch (II->getIntrinsicID()) {
case Intrinsic::vector_deinterleave2:
case Intrinsic::vector_deinterleave3:
case Intrinsic::vector_deinterleave4:
case Intrinsic::vector_deinterleave5:
case Intrinsic::vector_deinterleave6:
case Intrinsic::vector_deinterleave7:
case Intrinsic::vector_deinterleave8:
Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
break;
case Intrinsic::vector_interleave2:
case Intrinsic::vector_interleave3:
case Intrinsic::vector_interleave4:
case Intrinsic::vector_interleave5:
case Intrinsic::vector_interleave6:
case Intrinsic::vector_interleave7:
case Intrinsic::vector_interleave8:
Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
break;
default:
Expand Down
78 changes: 75 additions & 3 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,26 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_fact
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: ret
%vec = load <32 x i8>, ptr %p
%d0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.vector.deinterleave4(<32 x i8> %vec)
%t0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 0
%t1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 1
%t2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 2
%t3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 3
%res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0
%res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1
%res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2
%res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3
}

; TODO: Remove once recursive deinterleaving support is removed
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor4_recursive(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor4_recursive:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: ret
%vec = load <32 x i8>, ptr %p
%d0 = call {<16 x i8>, <16 x i8>} @llvm.vector.deinterleave2.v32i8(<32 x i8> %vec)
Expand Down Expand Up @@ -319,6 +339,29 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4
}

define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor6(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor6:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg6e8.v v8, (a0)
; CHECK-NEXT: ret
%vec = load <48 x i8>, ptr %p
%d0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.vector.deinterleave6(<48 x i8> %vec)
%t0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 0
%t1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 1
%t2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 2
%t3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 3
%t4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 4
%t5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 5
%res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0
%res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1
%res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2
%res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3
%res4 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t4, 4
%res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4, <8 x i8> %t5, 5
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res5
}

define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor7(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor7:
; CHECK: # %bb.0:
Expand All @@ -339,14 +382,43 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
%res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2
%res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3
%res4 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t4, 4
%res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t5, 5
%res6 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t6, 6
%res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4, <8 x i8> %t5, 5
%res6 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res5, <8 x i8> %t6, 6
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res6
}

define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave_load_factor8(ptr %ptr) {
define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor8(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vlseg8e8.v v8, (a0)
; CHECK-NEXT: vmv1r.v v15, v14
; CHECK-NEXT: ret
%vec = load <64 x i8>, ptr %p
%d0 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.vector.deinterleave8(<64 x i8> %vec)
%t0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 0
%t1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 1
%t2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 2
%t3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 3
%t4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 4
%t5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 5
%t6 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 6
%t7 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %d0, 7
%res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0
%res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1
%res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2
%res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3
%res4 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3, <8 x i8> %t4, 4
%res5 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res4, <8 x i8> %t5, 5
%res6 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res5, <8 x i8> %t6, 6
%res7 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res6, <8 x i8> %t6, 7
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res7
}

; TODO: Remove once recursive deinterleaving support is removed
define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave_load_factor8_recursive(ptr %ptr) {
; CHECK-LABEL: vector_deinterleave_load_factor8_recursive:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vlseg8e32.v v8, (a0)
; CHECK-NEXT: ret
Expand Down
35 changes: 35 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,18 @@ define void @vector_interleave_store_factor4(<4 x i32> %a, <4 x i32> %b, <4 x i3
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsseg4e32.v v8, (a0)
; CHECK-NEXT: ret
%v = call <16 x i32> @llvm.vector.interleave4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d)
store <16 x i32> %v, ptr %p
ret void
}

; TODO: Remove once recursive interleaving support is removed
define void @vector_interleave_store_factor4_recursive(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, ptr %p) {
; CHECK-LABEL: vector_interleave_store_factor4_recursive:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsseg4e32.v v8, (a0)
; CHECK-NEXT: ret
%v0 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %c)
%v1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %b, <4 x i32> %d)
Expand All @@ -216,6 +228,17 @@ define void @vector_interleave_store_factor5(<4 x i32> %a, <4 x i32> %b, <4 x i3
ret void
}

define void @vector_interleave_store_factor6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, ptr %p) {
; CHECK-LABEL: vector_interleave_store_factor6:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsseg6e32.v v8, (a0)
; CHECK-NEXT: ret
%v = call <24 x i32> @llvm.vector.interleave6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f)
store <24 x i32> %v, ptr %p
ret void
}

define void @vector_interleave_store_factor7(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, ptr %p) {
; CHECK-LABEL: vector_interleave_store_factor7:
; CHECK: # %bb.0:
Expand All @@ -232,6 +255,18 @@ define void @vector_interleave_store_factor8(<4 x i32> %a, <4 x i32> %b, <4 x i3
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsseg8e32.v v8, (a0)
; CHECK-NEXT: ret
%v = call <32 x i32> @llvm.vector.interleave8(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h)
store <32 x i32> %v, ptr %p
ret void
}

; TODO: Remove once recursive interleaving support is removed
define void @vector_interleave_store_factor8_recursive(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d, <4 x i32> %e, <4 x i32> %f, <4 x i32> %g, <4 x i32> %h, ptr %p) {
; CHECK-LABEL: vector_interleave_store_factor8_recursive:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsseg8e32.v v8, (a0)
; CHECK-NEXT: ret
%v0 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %e)
%v1 = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %c, <4 x i32> %g)
Expand Down
Loading