diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b25cb128bce9f..32a94f3dee22c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -12430,7 +12430,11 @@ RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; } - SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL); + // Don't use EVL or Mask for vid so it can be hoisted out of loops. + auto [TrueMask, VLMAX] = + getDefaultScalableVLOps(IndicesVT, DL, DAG, Subtarget); + SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, TrueMask, VLMAX); + SDValue VecLen = DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT)); SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll index 136f6e7bc9990..887edafe9c88a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll @@ -5,10 +5,10 @@ define <2 x double> @test_vp_reverse_v2f64_masked(<2 x double> %src, <2 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v2f64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -34,10 +34,10 @@ define <2 x double> @test_vp_reverse_v2f64(<2 x double> %src, i32 zeroext %evl) define <4 x float> @test_vp_reverse_v4f32_masked(<4 x float> %src, <4 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v4f32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll index b235990ab5dd0..194eb222be01f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll @@ -4,10 +4,10 @@ define @test_vp_reverse_nxv1f64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv1f64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -33,10 +33,10 @@ define @test_vp_reverse_nxv1f64( %src define @test_vp_reverse_nxv2f32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv2f32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -62,10 +62,10 @@ define @test_vp_reverse_nxv2f32( %src, define @test_vp_reverse_nxv2f64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv2f64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1, v0.t ; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -91,10 +91,10 @@ define @test_vp_reverse_nxv2f64( %src define @test_vp_reverse_nxv4f32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv4f32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1, v0.t ; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -120,10 +120,10 @@ define @test_vp_reverse_nxv4f32( %src, define @test_vp_reverse_nxv4f64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv4f64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1, v0.t ; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -149,10 +149,10 @@ define @test_vp_reverse_nxv4f64( %src define @test_vp_reverse_nxv8f32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8f32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1, v0.t ; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -178,10 +178,10 @@ define @test_vp_reverse_nxv8f32( %src, define @test_vp_reverse_nxv8f64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8f64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1, v0.t ; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret @@ -207,10 +207,10 @@ define @test_vp_reverse_nxv8f64( %src define @test_vp_reverse_nxv16f32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv16f32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1, v0.t ; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll index 27f16f0285e12..33fa3539ade93 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll @@ -5,10 +5,10 @@ define <2 x i64> @test_vp_reverse_v2i64_masked(<2 x i64> %src, <2 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v2i64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -34,10 +34,10 @@ define <2 x i64> @test_vp_reverse_v2i64(<2 x i64> %src, i32 zeroext %evl) { define <4 x i32> @test_vp_reverse_v4i32_masked(<4 x i32> %src, <4 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v4i32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -63,10 +63,10 @@ define <4 x i32> @test_vp_reverse_v4i32(<4 x i32> %src, i32 zeroext %evl) { define <8 x i16> @test_vp_reverse_v8i16_masked(<8 x i16> %src, <8 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v8i16_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -92,10 +92,10 @@ define <8 x i16> @test_vp_reverse_v8i16(<8 x i16> %src, i32 zeroext %evl) { define <16 x i8> @test_vp_reverse_v16i8_masked(<16 x i8> %src, <16 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v16i8_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v10, v10, a1, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll index 507f5154cf1ac..ab37e5f27bcef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll @@ -4,10 +4,10 @@ define @test_vp_reverse_nxv1i64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv1i64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -33,10 +33,10 @@ define @test_vp_reverse_nxv1i64( %src, i32 define @test_vp_reverse_nxv2i32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv2i32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -62,10 +62,10 @@ define @test_vp_reverse_nxv2i32( %src, i32 define @test_vp_reverse_nxv4i16_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv4i16_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -91,10 +91,10 @@ define @test_vp_reverse_nxv4i16( %src, i32 define @test_vp_reverse_nxv8i8_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8i8_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v10, v10, a1, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 @@ -122,10 +122,10 @@ define @test_vp_reverse_nxv8i8( %src, i32 zer define @test_vp_reverse_nxv2i64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv2i64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1, v0.t ; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -151,10 +151,10 @@ define @test_vp_reverse_nxv2i64( %src, i32 define @test_vp_reverse_nxv4i32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv4i32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1, v0.t ; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -180,10 +180,10 @@ define @test_vp_reverse_nxv4i32( %src, i32 define @test_vp_reverse_nxv8i16_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8i16_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1, v0.t ; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -209,10 +209,10 @@ define @test_vp_reverse_nxv8i16( %src, i32 define @test_vp_reverse_nxv16i8_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv16i8_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v12, v12, a1, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -240,10 +240,10 @@ define @test_vp_reverse_nxv16i8( %src, i32 define @test_vp_reverse_nxv4i64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv4i64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1, v0.t ; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -269,10 +269,10 @@ define @test_vp_reverse_nxv4i64( %src, i32 define @test_vp_reverse_nxv8i32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8i32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1, v0.t ; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -298,10 +298,10 @@ define @test_vp_reverse_nxv8i32( %src, i32 define @test_vp_reverse_nxv16i16_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv16i16_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1, v0.t ; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -327,10 +327,10 @@ define @test_vp_reverse_nxv16i16( %src, i define @test_vp_reverse_nxv32i8_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv32i8_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v16, v16, a1, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 @@ -358,10 +358,10 @@ define @test_vp_reverse_nxv32i8( %src, i32 define @test_vp_reverse_nxv8i64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8i64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1, v0.t ; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret @@ -387,10 +387,10 @@ define @test_vp_reverse_nxv8i64( %src, i32 define @test_vp_reverse_nxv16i32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv16i32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1, v0.t ; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret @@ -416,10 +416,10 @@ define @test_vp_reverse_nxv16i32( %src, i define @test_vp_reverse_nxv32i16_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv32i16_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1, v0.t ; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-loop.ll new file mode 100644 index 0000000000000..54f9670e62460 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-loop.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s + +; Test that we hoist the vid.v out of the loop by not using the EVL for AVL. + +define void @reverse(ptr %p) { +; CHECK-LABEL: reverse: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: .LBB0_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: vsetvli a3, a3, e64, m1, ta, ma +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vle64.v v9, (a4) +; CHECK-NEXT: addi a5, a3, -1 +; CHECK-NEXT: vrsub.vx v10, v8, a5 +; CHECK-NEXT: vrgather.vv v11, v9, v10 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: vse64.v v11, (a4) +; CHECK-NEXT: bltu a1, a2, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: ret +entry: + br label %loop +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + + %cnt = sub i64 1024, %iv + %evl = call i32 @llvm.experimental.get.vector.length(i64 %cnt, i32 1, i1 true) + + %p.gep = getelementptr i64, ptr %p, i64 %iv + %v = call @llvm.vp.load(ptr %p.gep, splat (i1 true), i32 %evl) + + %w = call @llvm.experimental.vp.reverse( %v, splat (i1 true), i32 %evl) + + call void @llvm.vp.store( %w, ptr %p.gep, splat (i1 true), i32 %evl) + + %evl.zext = zext i32 %evl to i64 + %iv.next = add i64 %iv, %evl.zext + %done = icmp uge i64 %iv.next, 1024 + br i1 %done, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll index 09d92c3c039f9..f07720698cfe1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll @@ -5,15 +5,17 @@ define <2 x i1> @test_vp_reverse_v2i1_masked(<2 x i1> %src, <2 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v2i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v11, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t ; CHECK-NEXT: ret @@ -44,15 +46,17 @@ define <2 x i1> @test_vp_reverse_v2i1(<2 x i1> %src, i32 zeroext %evl) { define <4 x i1> @test_vp_reverse_v4i1_masked(<4 x i1> %src, <4 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v4i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v11, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t ; CHECK-NEXT: ret @@ -83,15 +87,17 @@ define <4 x i1> @test_vp_reverse_v4i1(<4 x i1> %src, i32 zeroext %evl) { define <8 x i1> @test_vp_reverse_v8i1_masked(<8 x i1> %src, <8 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v8i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v11, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t ; CHECK-NEXT: ret @@ -122,15 +128,17 @@ define <8 x i1> @test_vp_reverse_v8i1(<8 x i1> %src, i32 zeroext %evl) { define <16 x i1> @test_vp_reverse_v16i1_masked(<16 x i1> %src, <16 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v16i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v12, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v12, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll index 8e44d76e7010f..c0b0d3d8d0717 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll @@ -4,15 +4,17 @@ define @test_vp_reverse_nxv1i1_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv1i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v11, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t ; CHECK-NEXT: ret @@ -43,15 +45,17 @@ define @test_vp_reverse_nxv1i1( %src, i32 zer define @test_vp_reverse_nxv2i1_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv2i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v11, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t ; CHECK-NEXT: ret @@ -82,15 +86,17 @@ define @test_vp_reverse_nxv2i1( %src, i32 zer define @test_vp_reverse_nxv4i1_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv4i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v11, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t ; CHECK-NEXT: ret @@ -121,15 +127,17 @@ define @test_vp_reverse_nxv4i1( %src, i32 zer define @test_vp_reverse_nxv8i1_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v12, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v12, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t ; CHECK-NEXT: ret @@ -160,15 +168,17 @@ define @test_vp_reverse_nxv8i1( %src, i32 zer define @test_vp_reverse_nxv16i1_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv16i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v16, v10, v12, v0.t ; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v8 @@ -200,15 +210,17 @@ define @test_vp_reverse_nxv16i1( %src, i32 define @test_vp_reverse_nxv32i1_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv32i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v24, v12, v16, v0.t ; CHECK-NEXT: vmsne.vi v8, v24, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v8