diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 7b9f544a5f9a4..71aba323bf04b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2585,6 +2585,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { : RTLIB::getLDEXP(N->getValueType(0)); if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { + // Scalarize vector FPOWI instead of promoting the type. This allows the + // scalar FPOWIs to be visited and converted to libcalls before promoting + // the type. + // FIXME: This should be done in LegalizeVectorOps/LegalizeDAG, but call + // lowering needs the unpromoted EVT. + if (IsPowI && N->getValueType(0).isVector()) + return DAG.UnrollVectorOp(N); SmallVector NewOps(N->ops()); NewOps[1 + OpOffset] = SExtPromotedInteger(N->getOperand(1 + OpOffset)); return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); diff --git a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll new file mode 100644 index 0000000000000..f6b14a9bb000f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.powi.v8f32.i32(<8 x float>, i32) + +define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind { +; CHECK-LABEL: powi_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill +; CHECK-NEXT: addi.w $fp, $a0, 0 +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 4 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 6 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 6 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 7 +; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> %va, i32 %b) + ret <8 x float> %res +} + +declare <4 x double> @llvm.powi.v4f64.i32(<4 x double>, i32) + +define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind { +; CHECK-LABEL: powi_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -80 +; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill +; CHECK-NEXT: addi.w $fp, $a0, 0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powidf2) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powidf2) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powidf2) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2 +; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill +; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powidf2) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3 +; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 80 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> %va, i32 %b) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll b/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll new file mode 100644 index 0000000000000..b0f54e78c7a44 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/fpowi.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32) + +define <4 x float> @powi_v4f32(<4 x float> %va, i32 %b) nounwind { +; CHECK-LABEL: powi_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -48 +; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; CHECK-NEXT: addi.w $fp, $a0, 0 +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 1 +; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2 +; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 +; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powisf2) +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> %va, i32 %b) + ret <4 x float> %res +} + +declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32) + +define <2 x double> @powi_v2f64(<2 x double> %va, i32 %b) nounwind { +; CHECK-LABEL: powi_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -48 +; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill +; CHECK-NEXT: addi.w $fp, $a0, 0 +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powidf2) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill +; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: bl %plt(__powidf2) +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 +; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 48 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> %va, i32 %b) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll new file mode 100644 index 0000000000000..c6b8b602718b7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll @@ -0,0 +1,1251 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -mattr=+v,+f,+d -target-abi=ilp32d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+f,+d -target-abi=lp64d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV64 + +define <1 x float> @powi_v1f32(<1 x float> %x, i32 %y) nounwind { +; RV32-LABEL: powi_v1f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vfmv.s.f v8, fa0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v1f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vfmv.s.f v8, fa0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = call <1 x float> @llvm.powi.v1f32.i32(<1 x float> %x, i32 %y) + ret <1 x float> %a +} +declare <1 x float> @llvm.powi.v1f32.i32(<1 x float>, i32) + +define <2 x float> @powi_v2f32(<2 x float> %x, i32 %y) nounwind { +; RV32-LABEL: powi_v2f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v9 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fmv.s fs0, fa0 +; RV32-NEXT: flw fa0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v2f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: addi a1, sp, 32 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v9 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fmv.s fs0, fa0 +; RV64-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> %x, i32 %y) + ret <2 x float> %a +} +declare <2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32) + +define <3 x float> @powi_v3f32(<3 x float> %x, i32 %y) nounwind { +; RV32-LABEL: powi_v3f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v9 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fmv.s fs0, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: flw fa0, 16(a0) # 8-byte Folded Reload +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v3f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v9 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fmv.s fs0, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: flw fa0, 32(a0) # 8-byte Folded Reload +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = call <3 x float> @llvm.powi.v3f32.i32(<3 x float> %x, i32 %y) + ret <3 x float> %a +} +declare <3 x float> @llvm.powi.v3f32.i32(<3 x float>, i32) + +define <4 x float> @powi_v4f32(<4 x float> %x, i32 %y) nounwind { +; RV32-LABEL: powi_v4f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v9 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fmv.s fs0, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: flw fa0, 16(a0) # 8-byte Folded Reload +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v4f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v9 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fmv.s fs0, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: flw fa0, 32(a0) # 8-byte Folded Reload +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> %x, i32 %y) + ret <4 x float> %a +} +declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32) + +define <8 x float> @powi_v8f32(<8 x float> %x, i32 %y) nounwind { +; RV32-LABEL: powi_v8f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v10 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fmv.s fs0, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 4 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 5 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 6 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 7 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v8f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v10, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v10 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fmv.s fs0, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 4 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 5 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 6 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 7 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> %x, i32 %y) + ret <8 x float> %a +} +declare <8 x float> @llvm.powi.v8f32.i32(<8 x float>, i32) + +define <16 x float> @powi_v16f32(<16 x float> %x, i32 %y) nounwind { +; RV32-LABEL: powi_v16f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -272 +; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 260(sp) # 4-byte Folded Spill +; RV32-NEXT: addi s0, sp, 272 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: andi sp, sp, -64 +; RV32-NEXT: mv s2, a0 +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: flw fa0, 124(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 188(sp) +; RV32-NEXT: flw fa0, 120(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 184(sp) +; RV32-NEXT: flw fa0, 116(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 180(sp) +; RV32-NEXT: flw fa0, 112(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 176(sp) +; RV32-NEXT: flw fa0, 108(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 172(sp) +; RV32-NEXT: flw fa0, 104(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 168(sp) +; RV32-NEXT: flw fa0, 100(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 164(sp) +; RV32-NEXT: flw fa0, 96(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 160(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 128(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 140(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 136(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 132(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 7 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 156(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 6 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 152(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 5 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 148(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 4 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powisf2 +; RV32-NEXT: fsw fa0, 144(sp) +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: addi sp, s0, -272 +; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 260(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 272 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v16f32: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -272 +; RV64-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 256(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 248(sp) # 8-byte Folded Spill +; RV64-NEXT: addi s0, sp, 272 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: andi sp, sp, -64 +; RV64-NEXT: addi a1, sp, 240 +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 64 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vse32.v v8, (a1) +; RV64-NEXT: flw fa0, 124(sp) +; RV64-NEXT: sext.w s2, a0 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 188(sp) +; RV64-NEXT: flw fa0, 120(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 184(sp) +; RV64-NEXT: flw fa0, 116(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 180(sp) +; RV64-NEXT: flw fa0, 112(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 176(sp) +; RV64-NEXT: flw fa0, 108(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 172(sp) +; RV64-NEXT: flw fa0, 104(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 168(sp) +; RV64-NEXT: flw fa0, 100(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 164(sp) +; RV64-NEXT: flw fa0, 96(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 160(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 128(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 140(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 136(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 132(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 7 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 156(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 6 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 152(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 5 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 148(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 4 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powisf2 +; RV64-NEXT: fsw fa0, 144(sp) +; RV64-NEXT: addi a0, sp, 128 +; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: addi sp, s0, -272 +; RV64-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 256(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 248(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 272 +; RV64-NEXT: ret + %a = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> %x, i32 %y) + ret <16 x float> %a +} +declare <16 x float> @llvm.powi.v16f32.i32(<16 x float>, i32) + +define <1 x double> @powi_v1f64(<1 x double> %x, i32 %y) nounwind { +; RV32-LABEL: powi_v1f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfmv.s.f v8, fa0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v1f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.s.f v8, fa0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = call <1 x double> @llvm.powi.v1f64.i32(<1 x double> %x, i32 %y) + ret <1 x double> %a +} +declare <1 x double> @llvm.powi.v1f64.i32(<1 x double>, i32) + +define <2 x double> @powi_v2f64(<2 x double> %x, i32 %y) nounwind { +; RV32-LABEL: powi_v2f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v9 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fmv.d fs0, fa0 +; RV32-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v2f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: addi a1, sp, 32 +; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v9 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fmv.d fs0, fa0 +; RV64-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> %x, i32 %y) + ret <2 x double> %a +} +declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32) + +define <4 x double> @powi_v4f64(<4 x double> %x, i32 %y) nounwind { +; RV32-LABEL: powi_v4f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v10 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fmv.d fs0, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfmv.v.f v8, fa0 +; RV32-NEXT: vfslide1down.vf v8, v8, fs0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vfslide1down.vf v8, v8, fa0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v4f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: fsd fs0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: sext.w s0, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vslidedown.vi v10, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v10 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fmv.d fs0, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vfmv.v.f v8, fa0 +; RV64-NEXT: vfslide1down.vf v8, v8, fs0 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: addi a0, sp, 32 +; RV64-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vfslide1down.vf v8, v8, fa0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: fld fs0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %a = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> %x, i32 %y) + ret <4 x double> %a +} +declare <4 x double> @llvm.powi.v4f64.i32(<4 x double>, i32) + +define <8 x double> @powi_v8f64(<8 x double> %x, i32 %y) nounwind { +; RV32-LABEL: powi_v8f64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -272 +; RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 260(sp) # 4-byte Folded Spill +; RV32-NEXT: addi s0, sp, 272 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: andi sp, sp, -64 +; RV32-NEXT: mv s2, a0 +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: fld fa0, 120(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 184(sp) +; RV32-NEXT: fld fa0, 112(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 176(sp) +; RV32-NEXT: fld fa0, 104(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 168(sp) +; RV32-NEXT: fld fa0, 96(sp) +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 160(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 128(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 136(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 152(sp) +; RV32-NEXT: addi a0, sp, 256 +; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 2 +; RV32-NEXT: vfmv.f.s fa0, v8 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __powidf2 +; RV32-NEXT: fsd fa0, 144(sp) +; RV32-NEXT: addi a0, sp, 128 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: addi sp, s0, -272 +; RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 260(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 272 +; RV32-NEXT: ret +; +; RV64-LABEL: powi_v8f64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -272 +; RV64-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 256(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 248(sp) # 8-byte Folded Spill +; RV64-NEXT: addi s0, sp, 272 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: andi sp, sp, -64 +; RV64-NEXT: addi a1, sp, 240 +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 64 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: fld fa0, 120(sp) +; RV64-NEXT: sext.w s2, a0 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 184(sp) +; RV64-NEXT: fld fa0, 112(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 176(sp) +; RV64-NEXT: fld fa0, 104(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 168(sp) +; RV64-NEXT: fld fa0, 96(sp) +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 160(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 128(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 136(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 3 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 152(sp) +; RV64-NEXT: addi a0, sp, 240 +; RV64-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: vfmv.f.s fa0, v8 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __powidf2 +; RV64-NEXT: fsd fa0, 144(sp) +; RV64-NEXT: addi a0, sp, 128 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: addi sp, s0, -272 +; RV64-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 256(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 248(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 272 +; RV64-NEXT: ret + %a = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> %x, i32 %y) + ret <8 x double> %a +} +declare <8 x double> @llvm.powi.v8f64.i32(<8 x double>, i32)