From c0bbaf5171506f4d32cb914d90d216a0571522e2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 15 Jan 2025 11:44:37 -0800 Subject: [PATCH 1/5] [RISCV] Fold vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK). This was extracted from our downstream with only a quick review. It was originally written 1.5 years ago so there might be existing helper functions added since then that could simplify it. Co-authored-by: Brandon Wu --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 65 ++++++++++ .../RISCV/rvv/vp-combine-reverse-load.ll | 114 ++++++++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b25cb128bce9f..8b879893b5960 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16229,6 +16229,69 @@ static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0)); } +static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + // Fold: + // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK) + + // Check if its first operand is a vp.load. + auto *VPLoad = dyn_cast(N->getOperand(0)); + if (!VPLoad) + return SDValue(); + + EVT LoadVT = VPLoad->getValueType(0); + // We do not have a strided_load version for masks, and the evl of vp.reverse + // and vp.load should always be the same. + if (!LoadVT.getVectorElementType().isByteSized() || + N->getOperand(2) != VPLoad->getVectorLength() || + !N->getOperand(0).hasOneUse()) + return SDValue(); + + // Check if the mask of outer vp.reverse are all 1's. + if (!isOneOrOneSplat(N->getOperand(1))) + return SDValue(); + + SDValue LoadMask = VPLoad->getMask(); + // If Mask is not all 1's, try to replace the mask if it's opcode + // is EXPERIMENTAL_VP_REVERSE and it's operand can be directly extracted. + if (!isOneOrOneSplat(LoadMask)) { + // Check if the mask of vp.reverse in vp.load are all 1's and + // the length of mask is same as evl. + if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE || + !isOneOrOneSplat(LoadMask.getOperand(1)) || + LoadMask.getOperand(2) != VPLoad->getVectorLength()) + return SDValue(); + LoadMask = LoadMask.getOperand(0); + } + + // Base = LoadAddr + (NumElem - 1) * ElemWidthByte + SDLoc DL(N); + MVT XLenVT = Subtarget.getXLenVT(); + SDValue NumElem = VPLoad->getVectorLength(); + uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8; + + SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem, + DAG.getConstant(1, DL, XLenVT)); + SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1, + DAG.getConstant(ElemWidthByte, DL, XLenVT)); + SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2); + SDValue Stride = DAG.getConstant(0 - ElemWidthByte, DL, XLenVT); + + MachineFunction &MF = DAG.getMachineFunction(); + MachinePointerInfo PtrInfo(VPLoad->getAddressSpace()); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, VPLoad->getMemOperand()->getFlags(), + LocationSize::beforeOrAfterPointer(), VPLoad->getAlign()); + + SDValue Ret = DAG.getStridedLoadVP( + LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask, + VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad()); + + DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1)); + + return Ret; +} + // Convert from one FMA opcode to another based on whether we are negating the // multiply result and/or the accumulator. // NOTE: Only supports RVV operations with VL. @@ -18372,6 +18435,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, } } } + case ISD::EXPERIMENTAL_VP_REVERSE: + return performVP_REVERSECombine(N, DAG, Subtarget); case ISD::BITCAST: { assert(Subtarget.useRVVForFixedLengthVectors()); SDValue N0 = N->getOperand(0); diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll b/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll new file mode 100644 index 0000000000000..b0604d51f93b3 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+f,+v -verify-machineinstrs < %s | FileCheck %s + +define @test_reverse_load_combiner(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: test_reverse_load_combiner: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: li a2, -4 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vlse32.v v8, (a0), a2 +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, %allones, i32 %evl) + %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, %allones, i32 %evl) + ret %rev +} + +define @test_load_mask_is_vp_reverse(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: test_load_mask_is_vp_reverse: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma +; CHECK-NEXT: vmv.s.x v8, a2 +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: li a2, -4 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vlse32.v v8, (a0), a2, v0.t +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 0 + %allones = shufflevector %head, undef, zeroinitializer + + %loadmask = call @llvm.experimental.vp.reverse.nxv2i1( %head, %allones, i32 %evl) + %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, %loadmask, i32 %evl) + %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, %allones, i32 %evl) + ret %rev +} + +define @test_load_mask_not_all_one(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: test_load_mask_not_all_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmclr.m v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v9, (a0), v0.t +; CHECK-NEXT: vid.v v8, v0.t +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vrsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 1 + %notallones = shufflevector %head, undef, zeroinitializer + + %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, %notallones, i32 %evl) + %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, %notallones, i32 %evl) + ret %rev +} + +define @test_differnet_evl(* %ptr, i32 zeroext %evl1, i32 zeroext %evl2) { +; CHECK-LABEL: test_differnet_evl: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 1 +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmclr.m v8 +; CHECK-NEXT: addi a3, a1, -1 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, ma +; CHECK-NEXT: vslideup.vi v10, v9, 1 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vand.vi v10, v10, 1 +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vrsub.vx v9, v9, a3, v0.t +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vrgatherei16.vv v11, v10, v9, v0.t +; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v9, (a0), v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vrsub.vx v10, v10, a2, v0.t +; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret + %head = insertelement undef, i1 1, i32 1 + %allones = shufflevector %head, undef, zeroinitializer + + %loadmask = call @llvm.experimental.vp.reverse.nxv2i1( %head, %allones, i32 %evl1) + %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, %loadmask, i32 %evl2) + %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, %allones, i32 %evl2) + ret %rev +} + +declare @llvm.vp.load.nxv2f32.p0nxv2f32(* nocapture, , i32) +declare @llvm.experimental.vp.reverse.nxv2f32(, , i32) +declare @llvm.experimental.vp.reverse.nxv2i1(, , i32) From 58dadc92b5f3f268b01dd7329560664c2b41958b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 15 Jan 2025 12:08:24 -0800 Subject: [PATCH 2/5] fixup! remove undef from tests. --- .../RISCV/rvv/vp-combine-reverse-load.ll | 79 ++++++------------- 1 file changed, 22 insertions(+), 57 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll b/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll index b0604d51f93b3..50e26bd141070 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll @@ -11,46 +11,30 @@ define @test_reverse_load_combiner(* %p ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a2 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, %allones, i32 %evl) - %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, %allones, i32 %evl) + %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, splat (i1 true), i32 %evl) + %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, splat (i1 true), i32 %evl) ret %rev } -define @test_load_mask_is_vp_reverse(* %ptr, i32 zeroext %evl) { +define @test_load_mask_is_vp_reverse(* %ptr, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_load_mask_is_vp_reverse: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: li a2, 1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: slli a2, a1, 2 ; CHECK-NEXT: add a0, a2, a0 -; CHECK-NEXT: li a2, -4 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vand.vi v8, v8, 1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: li a2, -4 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a2, v0.t ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %loadmask = call @llvm.experimental.vp.reverse.nxv2i1( %head, %allones, i32 %evl) + %loadmask = call @llvm.experimental.vp.reverse.nxv2i1( %mask, splat (i1 true), i32 %evl) %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, %loadmask, i32 %evl) - %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, %allones, i32 %evl) + %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, splat (i1 true), i32 %evl) ret %rev } -define @test_load_mask_not_all_one(* %ptr, i32 zeroext %evl) { +define @test_load_mask_not_all_one(* %ptr, %notallones, i32 zeroext %evl) { ; CHECK-LABEL: test_load_mask_not_all_one: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a0), v0.t ; CHECK-NEXT: vid.v v8, v0.t @@ -58,54 +42,35 @@ define @test_load_mask_not_all_one(* %p ; CHECK-NEXT: vrsub.vx v10, v8, a1, v0.t ; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 1 - %notallones = shufflevector %head, undef, zeroinitializer - %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, %notallones, i32 %evl) %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, %notallones, i32 %evl) ret %rev } -define @test_differnet_evl(* %ptr, i32 zeroext %evl1, i32 zeroext %evl2) { -; CHECK-LABEL: test_differnet_evl: +define @test_different_evl(* %ptr, %mask, i32 zeroext %evl1, i32 zeroext %evl2) { +; CHECK-LABEL: test_different_evl: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, 1 -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmclr.m v8 ; CHECK-NEXT: addi a3, a1, -1 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, ma -; CHECK-NEXT: vslideup.vi v10, v9, 1 -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; CHECK-NEXT: vand.vi v10, v10, 1 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vrsub.vx v9, v9, a3, v0.t -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vrsub.vx v8, v8, a3 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 ; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vrgatherei16.vv v11, v10, v9, v0.t -; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a0), v0.t -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: vrsub.vx v10, v10, a2, v0.t -; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vrsub.vx v10, v8, a2 +; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 1 - %allones = shufflevector %head, undef, zeroinitializer - - %loadmask = call @llvm.experimental.vp.reverse.nxv2i1( %head, %allones, i32 %evl1) + %loadmask = call @llvm.experimental.vp.reverse.nxv2i1( %mask, splat (i1 true), i32 %evl1) %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, %loadmask, i32 %evl2) - %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, %allones, i32 %evl2) + %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, splat (i1 true), i32 %evl2) ret %rev } From e045e5d68aca7e3788664b843ef26ee910cfb329 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 15 Jan 2025 18:09:31 -0800 Subject: [PATCH 3/5] Update llvm/lib/Target/RISCV/RISCVISelLowering.cpp Co-authored-by: Luke Lau --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 8b879893b5960..aaaccc7cf814a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16252,8 +16252,8 @@ static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue LoadMask = VPLoad->getMask(); - // If Mask is not all 1's, try to replace the mask if it's opcode - // is EXPERIMENTAL_VP_REVERSE and it's operand can be directly extracted. + // If Mask is not all 1's, try to replace the mask if its opcode + // is EXPERIMENTAL_VP_REVERSE and its operand can be directly extracted. if (!isOneOrOneSplat(LoadMask)) { // Check if the mask of vp.reverse in vp.load are all 1's and // the length of mask is same as evl. From 167e6a9a9218a071882518abfc7df2da7d310e87 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 15 Jan 2025 20:27:46 -0800 Subject: [PATCH 4/5] fixup! address review comment. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index aaaccc7cf814a..002ac452b3140 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16275,7 +16275,7 @@ static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1, DAG.getConstant(ElemWidthByte, DL, XLenVT)); SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2); - SDValue Stride = DAG.getConstant(0 - ElemWidthByte, DL, XLenVT); + SDValue Stride = DAG.getConstant(-ElemWidthByte, DL, XLenVT); MachineFunction &MF = DAG.getMachineFunction(); MachinePointerInfo PtrInfo(VPLoad->getAddressSpace()); From 4c4232fbbc1e8e6d40398248fdb73299b64f5b33 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 15 Jan 2025 20:31:58 -0800 Subject: [PATCH 5/5] fixup! improve comments --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 002ac452b3140..f8a5ccc3023a4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16252,11 +16252,10 @@ static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue LoadMask = VPLoad->getMask(); - // If Mask is not all 1's, try to replace the mask if its opcode - // is EXPERIMENTAL_VP_REVERSE and its operand can be directly extracted. + // If Mask is all ones, then load is unmasked and can be reversed. if (!isOneOrOneSplat(LoadMask)) { - // Check if the mask of vp.reverse in vp.load are all 1's and - // the length of mask is same as evl. + // If the mask is not all ones, we can reverse the load if the mask was also + // reversed by an unmasked vp.reverse with the same EVL. if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE || !isOneOrOneSplat(LoadMask.getOperand(1)) || LoadMask.getOperand(2) != VPLoad->getVectorLength())