Closed
Description
void testWhileWR(int *data1, int *data2, int size) {
for (int i = 0; i < size; i++) {
data2[i] = data1[i];
}
}
- As the IR node
%6
will be hoist out thevector.body
in pass LCSSAPass, then it place in different BB with the node%index.next
, so it can't be folded in combine.
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%active.lane.mask = phi <vscale x 4 x i1> [ %active.lane.mask.entry, %vector.ph ], [ %active.lane.mask.next, %vector.body ]
%3 = getelementptr inbounds i32, ptr %data1, i64 %index
%wide.masked.load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr %3, i32 4, <vscale x 4 x i1> %active.lane.mask, <vscale x 4 x i32> poison), !tbaa !6
%4 = getelementptr inbounds i32, ptr %data2, i64 %index
call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %wide.masked.load, ptr %4, i32 4, <vscale x 4 x i1> %active.lane.mask), !tbaa !6
%5 = call i64 @llvm.vscale.i64()
%6 = shl nuw nsw i64 %5, 2
%index.next = add i64 %index, %6
%active.lane.mask.next = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %index.next, i64 %wide.trip.count)
%7 = extractelement <vscale x 4 x i1> %active.lane.mask.next, i64 0
br i1 %7, label %vector.body, label %for.cond.cleanup.loopexit12, !llvm.loop !10