@@ -744,40 +744,39 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst
744
744
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
745
745
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
746
746
; CHECK: [[VECTOR_BODY]]:
747
- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6 :.*]] ]
748
- ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6 ]] ]
747
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SREM_CONTINUE6 :.*]] ]
748
+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_SREM_CONTINUE6 ]] ]
749
749
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
750
750
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
751
751
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
752
- ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
753
- ; CHECK: [[PRED_STORE_IF]]:
754
- ; CHECK-NEXT: store i64 poison, ptr [[AUX]], align 8
755
- ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
756
- ; CHECK: [[PRED_STORE_CONTINUE]]:
752
+ ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]]
753
+ ; CHECK: [[PRED_SREM_IF]]:
754
+ ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE]]
755
+ ; CHECK: [[PRED_SREM_CONTINUE]]:
757
756
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
758
- ; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
759
- ; CHECK: [[PRED_STORE_IF1]]:
760
- ; CHECK-NEXT: store i64 poison, ptr [[AUX]], align 8
761
- ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
762
- ; CHECK: [[PRED_STORE_CONTINUE2]]:
757
+ ; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]]
758
+ ; CHECK: [[PRED_SREM_IF1]]:
759
+ ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE2]]
760
+ ; CHECK: [[PRED_SREM_CONTINUE2]]:
763
761
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
764
- ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
765
- ; CHECK: [[PRED_STORE_IF3]]:
766
- ; CHECK-NEXT: store i64 poison, ptr [[AUX]], align 8
767
- ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
768
- ; CHECK: [[PRED_STORE_CONTINUE4]]:
762
+ ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SREM_IF3:.*]], label %[[PRED_SREM_CONTINUE4:.*]]
763
+ ; CHECK: [[PRED_SREM_IF3]]:
764
+ ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE4]]
765
+ ; CHECK: [[PRED_SREM_CONTINUE4]]:
769
766
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
770
- ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]]
771
- ; CHECK: [[PRED_STORE_IF5]]:
772
- ; CHECK-NEXT: store i64 poison, ptr [[AUX]], align 8
773
- ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
774
- ; CHECK: [[PRED_STORE_CONTINUE6]]:
767
+ ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SREM_IF5:.*]], label %[[PRED_SREM_CONTINUE6]]
768
+ ; CHECK: [[PRED_SREM_IF5]]:
769
+ ; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE6]]
770
+ ; CHECK: [[PRED_SREM_CONTINUE6]]:
775
771
; CHECK-NEXT: [[TMP6:%.*]] = add i64 poison, -3
776
772
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], [[TMP6]]
777
773
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP7]]
778
774
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0
779
775
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1
780
776
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]]
777
+ ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> poison
778
+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[PREDPHI7]], i32 3
779
+ ; CHECK-NEXT: store i64 [[TMP12]], ptr [[AUX]], align 8
781
780
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
782
781
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0
783
782
; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP11]], align 4
@@ -797,7 +796,6 @@ loop.header:
797
796
798
797
then:
799
798
%rem = srem i64 3 , 0
800
- store i64 %rem , ptr %aux
801
799
%add3 = add i64 %rem , -3
802
800
%add5 = add i64 %iv , %add3
803
801
%gep = getelementptr [5 x i8 ], ptr @c , i64 0 , i64 %add5
@@ -806,6 +804,8 @@ then:
806
804
807
805
loop.latch:
808
806
%sr = phi i8 [ 0 , %loop.header ], [ %l , %then ]
807
+ %p = phi i64 [ 0 , %loop.header ], [ %rem , %then ]
808
+ store i64 %p , ptr %aux
809
809
%gep.dst = getelementptr i8 , ptr %dst , i64 %iv
810
810
store i8 %sr , ptr %gep.dst , align 4
811
811
%inc = add i64 %iv , 1
0 commit comments