diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 976c65e51c205..be897bb257796 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1143,27 +1143,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { if (MI.getNumDefs() != 1) return false; - // If we're not using VLMAX, then we need to be careful whether we are using - // TA/TU when there is a non-undef Passthru. But when we are using VLMAX, it - // does not matter whether we are using TA/TU with a non-undef Passthru, since - // there are no tail elements to be preserved. unsigned VLOpNum = RISCVII::getVLOpNum(Desc); const MachineOperand &VLOp = MI.getOperand(VLOpNum); - if (VLOp.isReg() || VLOp.getImm() != RISCV::VLMaxSentinel) { - // If MI has a non-undef passthru, we will not try to optimize it since - // that requires us to preserve tail elements according to TA/TU. - // Otherwise, The MI has an undef Passthru, so it doesn't matter whether we - // are using TA/TU. - bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(Desc); - unsigned PassthruOpIdx = MI.getNumExplicitDefs(); - if (HasPassthru && - MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister) { - LLVM_DEBUG( - dbgs() << " Not a candidate because it uses non-undef passthru" - " with non-VLMAX VL\n"); - return false; - } - } // If the VL is 1, then there is no need to reduce it. This is an // optimization, not needed to preserve correctness. @@ -1247,7 +1228,7 @@ std::optional RISCVVLOptimizer::checkUsers(MachineInstr &MI) { return std::nullopt; } - // Tied operands might pass through. + // If used as a passthru, elements past VL will be read. if (UserOp.isTied()) { LLVM_DEBUG(dbgs() << " Abort because user used as tied operand\n"); return std::nullopt; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 49db94e1a02df..9dbe261b7cd05 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -3919,11 +3919,12 @@ define void @trunc_v6bf16(ptr %x) { ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -4002,11 +4003,12 @@ define void @trunc_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 +; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret @@ -4098,12 +4100,13 @@ define void @ceil_v6bf16(ptr %x) { ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 3 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -4189,12 +4192,13 @@ define void @ceil_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 3 +; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret @@ -4290,12 +4294,13 @@ define void @floor_v6bf16(ptr %x) { ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 2 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -4381,12 +4386,13 @@ define void @floor_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 2 +; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret @@ -4482,12 +4488,13 @@ define void @round_v6bf16(ptr %x) { ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 4 +; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -4573,12 +4580,13 @@ define void @round_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 4 +; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll index 1cc30f077feb4..3e49da014d56f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll @@ -107,7 +107,8 @@ define @different_vl_with_ta( %a, %w } -; Test case to make sure VL won't propgate if using tail-undisturbed policy. +; We can propagate VL to a tail-undisturbed policy, provided none of its users +; are passthrus (i.e. read past VL). define @different_vl_with_tu( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { ; CHECK-LABEL: different_vl_with_tu: ; CHECK: # %bb.0: @@ -118,22 +119,65 @@ define @different_vl_with_tu( %passthru, @llvm.riscv.vadd.nxv4i32.nxv4i32( %a, %a, %b, iXLen %vl1) - %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %passthru, %v, %a,iXLen %vl2) + %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %passthru, %v, %a, iXLen %vl2) ret %w } -; Test case to make sure VL won't propgate if using tail-undisturbed policy. +; We can propagate VL to a tail-undisturbed policy, provided none of its users +; are passthrus (i.e. read past VL). define @different_imm_vl_with_tu( %passthru, %a, %b, iXLen %vl1, iXLen %vl2) { -; CHECK-LABEL: different_imm_vl_with_tu: +; NOVLOPT-LABEL: different_imm_vl_with_tu: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; NOVLOPT-NEXT: vmv2r.v v14, v10 +; NOVLOPT-NEXT: vadd.vv v14, v10, v12 +; NOVLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; NOVLOPT-NEXT: vadd.vv v8, v14, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: different_imm_vl_with_tu: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; VLOPT-NEXT: vmv2r.v v14, v10 +; VLOPT-NEXT: vadd.vv v14, v10, v12 +; VLOPT-NEXT: vadd.vv v8, v14, v10 +; VLOPT-NEXT: ret + %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %a, %a, %b, iXLen 5) + %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %passthru, %v, %a, iXLen 4) + ret %w +} + +; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL +; are demanded. +define @different_vl_as_passthru( %a, %b, iXLen %vl1, iXLen %vl2) { +; CHECK-LABEL: different_vl_as_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: vadd.vv v12, v8, v10 +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; CHECK-NEXT: vadd.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %a, %a, %b, iXLen %vl1) + %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %v, %a, %b, iXLen %vl2) + ret %w +} + +; We can't reduce the VL as %v is used as a passthru, i.e. the elements past VL +; are demanded. +define @different_imm_vl_as_passthru( %a, %b, iXLen %vl1, iXLen %vl2) { +; CHECK-LABEL: different_imm_vl_as_passthru: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; CHECK-NEXT: vmv2r.v v14, v10 -; CHECK-NEXT: vadd.vv v14, v10, v12 +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: vadd.vv v12, v8, v10 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; CHECK-NEXT: vadd.vv v8, v14, v10 +; CHECK-NEXT: vadd.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret %v = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %a, %a, %b, iXLen 5) - %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %passthru, %v, %a,iXLen 4) + %w = call @llvm.riscv.vadd.nxv4i32.nxv4i32( %v, %a, %b, iXLen 4) ret %w }