diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 2c939967a5e1d..7ee28d08d556c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8622,8 +8622,16 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node, // If MinMax is NaN, let's quiet it. if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) && !DAG.isKnownNeverNaN(RHS)) { - SDValue MinMaxQuiet = - DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags); + SDValue MinMaxQuiet; + if (isOperationLegalOrCustom(ISD::FCANONICALIZE, VT)) { + MinMaxQuiet = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags); + } else { + // MIPS pre-R5 and HPPA use different encoding of qNaN and sNaN. + // ISD::FCANONICALIZE is supported by MIPS. + // HPPA is not supported by LLVM yet. + MinMaxQuiet = + DAG.getConstantFP(APFloat::getQNaN(VT.getFltSemantics()), DL, VT); + } MinMax = DAG.getSelectCC(DL, MinMax, MinMax, MinMaxQuiet, MinMax, ISD::SETUO); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 11c9a992cbdee..e929d98e95971 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -608,6 +608,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMAXNUM, VT, Action); setOperationAction(ISD::FMINIMUM, VT, Action); setOperationAction(ISD::FMAXIMUM, VT, Action); + setOperationAction(ISD::FCANONICALIZE, VT, Action); setOperationAction(ISD::FSIN, VT, Action); setOperationAction(ISD::FCOS, VT, Action); setOperationAction(ISD::FSINCOS, VT, Action); @@ -668,6 +669,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSIN , VT, Expand); setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); + + setOperationAction(ISD::FCANONICALIZE, VT, Expand); } // Half type will be promoted by default. diff --git a/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll new file mode 100644 index 0000000000000..6dd7e582fae0b --- /dev/null +++ b/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll @@ -0,0 +1,1363 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=x86_64 < %s | FileCheck %s --check-prefix=X64 +; RUN: llc --mtriple=x86_64 --mattr=+avx < %s | FileCheck %s --check-prefix=X64AVX +; RUN: llc --mtriple=x86_64 --mattr=+avx512fp16 < %s | FileCheck %s --check-prefix=X64AVX512FP16 + +declare float @llvm.maximumnum.f32(float, float) +declare double @llvm.maximumnum.f64(double, double) +declare float @llvm.minimumnum.f32(float, float) +declare double @llvm.minimumnum.f64(double, double) + +define float @maximumnum_float(float %x, float %y) { +; X64-LABEL: maximumnum_float: +; X64: # %bb.0: +; X64-NEXT: movaps %xmm0, %xmm2 +; X64-NEXT: cmpunordss %xmm0, %xmm2 +; X64-NEXT: movaps %xmm2, %xmm3 +; X64-NEXT: andps %xmm1, %xmm3 +; X64-NEXT: andnps %xmm0, %xmm2 +; X64-NEXT: orps %xmm3, %xmm2 +; X64-NEXT: movaps %xmm1, %xmm3 +; X64-NEXT: cmpunordss %xmm1, %xmm3 +; X64-NEXT: movaps %xmm3, %xmm0 +; X64-NEXT: andps %xmm2, %xmm0 +; X64-NEXT: andnps %xmm1, %xmm3 +; X64-NEXT: orps %xmm0, %xmm3 +; X64-NEXT: movaps %xmm3, %xmm0 +; X64-NEXT: cmpltss %xmm2, %xmm0 +; X64-NEXT: movaps %xmm0, %xmm1 +; X64-NEXT: andps %xmm2, %xmm1 +; X64-NEXT: andnps %xmm3, %xmm0 +; X64-NEXT: orps %xmm1, %xmm0 +; X64-NEXT: movaps %xmm0, %xmm1 +; X64-NEXT: cmpunordss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm4 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; X64-NEXT: andps %xmm1, %xmm4 +; X64-NEXT: andnps %xmm0, %xmm1 +; X64-NEXT: orps %xmm1, %xmm4 +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: cmpeqss %xmm4, %xmm1 +; X64-NEXT: movd %xmm2, %eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: je .LBB0_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movaps %xmm4, %xmm2 +; X64-NEXT: .LBB0_2: +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: andnps %xmm4, %xmm0 +; X64-NEXT: movd %xmm3, %eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: je .LBB0_4 +; X64-NEXT: # %bb.3: +; X64-NEXT: movaps %xmm2, %xmm3 +; X64-NEXT: .LBB0_4: +; X64-NEXT: andps %xmm3, %xmm1 +; X64-NEXT: orps %xmm1, %xmm0 +; X64-NEXT: retq +; +; X64AVX-LABEL: maximumnum_float: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 +; X64AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm2 +; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm0 +; X64AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; X64AVX-NEXT: vcmpltss %xmm2, %xmm0, %xmm1 +; X64AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm1 +; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm3 +; X64AVX-NEXT: vblendvps %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; X64AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64AVX-NEXT: vmovd %xmm2, %eax +; X64AVX-NEXT: testl %eax, %eax +; X64AVX-NEXT: je .LBB0_2 +; X64AVX-NEXT: # %bb.1: +; X64AVX-NEXT: vmovaps %xmm1, %xmm2 +; X64AVX-NEXT: .LBB0_2: +; X64AVX-NEXT: vcmpeqss %xmm3, %xmm1, %xmm3 +; X64AVX-NEXT: vmovd %xmm0, %eax +; X64AVX-NEXT: testl %eax, %eax +; X64AVX-NEXT: je .LBB0_4 +; X64AVX-NEXT: # %bb.3: +; X64AVX-NEXT: vmovaps %xmm2, %xmm0 +; X64AVX-NEXT: .LBB0_4: +; X64AVX-NEXT: vblendvps %xmm3, %xmm0, %xmm1, %xmm0 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: maximumnum_float: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpltss %xmm0, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2 +; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; X64AVX512FP16-NEXT: vmovss {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0] +; X64AVX512FP16-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64AVX512FP16-NEXT: vcmpeqss %xmm3, %xmm2, %k1 +; X64AVX512FP16-NEXT: vmovd %xmm0, %eax +; X64AVX512FP16-NEXT: testl %eax, %eax +; X64AVX512FP16-NEXT: sete %al +; X64AVX512FP16-NEXT: kmovd %eax, %k2 +; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm3 +; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm3, %xmm3 {%k2} +; X64AVX512FP16-NEXT: vmovd %xmm1, %eax +; X64AVX512FP16-NEXT: testl %eax, %eax +; X64AVX512FP16-NEXT: sete %al +; X64AVX512FP16-NEXT: kmovd %eax, %k2 +; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k2} +; X64AVX512FP16-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call float @llvm.maximumnum.f32(float %x, float %y) + ret float %z +} + +define float @maximumnum_float_nsz(float %x, float %y) { +; X64-LABEL: maximumnum_float_nsz: +; X64: # %bb.0: +; X64-NEXT: movaps %xmm0, %xmm2 +; X64-NEXT: cmpunordss %xmm0, %xmm2 +; X64-NEXT: movaps %xmm2, %xmm3 +; X64-NEXT: andps %xmm1, %xmm3 +; X64-NEXT: andnps %xmm0, %xmm2 +; X64-NEXT: orps %xmm3, %xmm2 +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: cmpunordss %xmm1, %xmm0 +; X64-NEXT: movaps %xmm0, %xmm3 +; X64-NEXT: andps %xmm2, %xmm3 +; X64-NEXT: andnps %xmm1, %xmm0 +; X64-NEXT: orps %xmm3, %xmm0 +; X64-NEXT: movaps %xmm0, %xmm1 +; X64-NEXT: cmpltss %xmm2, %xmm1 +; X64-NEXT: andps %xmm1, %xmm2 +; X64-NEXT: andnps %xmm0, %xmm1 +; X64-NEXT: orps %xmm2, %xmm1 +; X64-NEXT: movaps %xmm1, %xmm2 +; X64-NEXT: cmpunordss %xmm1, %xmm2 +; X64-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; X64-NEXT: andps %xmm2, %xmm0 +; X64-NEXT: andnps %xmm1, %xmm2 +; X64-NEXT: orps %xmm2, %xmm0 +; X64-NEXT: retq +; +; X64AVX-LABEL: maximumnum_float_nsz: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 +; X64AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 +; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; X64AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm1 +; X64AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm2 +; X64AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1 +; X64AVX-NEXT: vblendvps %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: maximumnum_float_nsz: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpltss %xmm0, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovss {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0] +; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call nsz float @llvm.maximumnum.f32(float %x, float %y) + ret float %z +} + +define float @maximumnum_float_nnan(float %x, float %y) { +; X64-LABEL: maximumnum_float_nnan: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: js .LBB2_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: movdqa %xmm0, %xmm2 +; X64-NEXT: jmp .LBB2_3 +; X64-NEXT: .LBB2_1: +; X64-NEXT: movdqa %xmm1, %xmm2 +; X64-NEXT: movdqa %xmm0, %xmm1 +; X64-NEXT: .LBB2_3: +; X64-NEXT: maxss %xmm2, %xmm1 +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: retq +; +; X64AVX-LABEL: maximumnum_float_nnan: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vmovd %xmm0, %eax +; X64AVX-NEXT: testl %eax, %eax +; X64AVX-NEXT: js .LBB2_1 +; X64AVX-NEXT: # %bb.2: +; X64AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm0 +; X64AVX-NEXT: retq +; X64AVX-NEXT: .LBB2_1: +; X64AVX-NEXT: vmovaps %xmm1, %xmm2 +; X64AVX-NEXT: vmaxss %xmm2, %xmm0, %xmm0 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: maximumnum_float_nnan: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vfpclassss $3, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2 +; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vmaxss %xmm2, %xmm0, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call nnan float @llvm.maximumnum.f32(float %x, float %y) + ret float %z +} + + +define double @maximumnum_double(double %x, double %y) { +; X64-LABEL: maximumnum_double: +; X64: # %bb.0: +; X64-NEXT: movapd %xmm0, %xmm2 +; X64-NEXT: cmpunordsd %xmm0, %xmm2 +; X64-NEXT: movapd %xmm2, %xmm3 +; X64-NEXT: andpd %xmm1, %xmm3 +; X64-NEXT: andnpd %xmm0, %xmm2 +; X64-NEXT: orpd %xmm3, %xmm2 +; X64-NEXT: movapd %xmm1, %xmm3 +; X64-NEXT: cmpunordsd %xmm1, %xmm3 +; X64-NEXT: movapd %xmm3, %xmm0 +; X64-NEXT: andpd %xmm2, %xmm0 +; X64-NEXT: andnpd %xmm1, %xmm3 +; X64-NEXT: orpd %xmm0, %xmm3 +; X64-NEXT: movapd %xmm3, %xmm0 +; X64-NEXT: cmpltsd %xmm2, %xmm0 +; X64-NEXT: movapd %xmm0, %xmm1 +; X64-NEXT: andpd %xmm2, %xmm1 +; X64-NEXT: andnpd %xmm3, %xmm0 +; X64-NEXT: orpd %xmm1, %xmm0 +; X64-NEXT: movapd %xmm0, %xmm1 +; X64-NEXT: cmpunordsd %xmm0, %xmm1 +; X64-NEXT: movsd {{.*#+}} xmm4 = [NaN,0.0E+0] +; X64-NEXT: andpd %xmm1, %xmm4 +; X64-NEXT: andnpd %xmm0, %xmm1 +; X64-NEXT: orpd %xmm1, %xmm4 +; X64-NEXT: xorpd %xmm1, %xmm1 +; X64-NEXT: cmpeqsd %xmm4, %xmm1 +; X64-NEXT: movq %xmm2, %rax +; X64-NEXT: testq %rax, %rax +; X64-NEXT: je .LBB3_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movapd %xmm4, %xmm2 +; X64-NEXT: .LBB3_2: +; X64-NEXT: movapd %xmm1, %xmm0 +; X64-NEXT: andnpd %xmm4, %xmm0 +; X64-NEXT: movq %xmm3, %rax +; X64-NEXT: testq %rax, %rax +; X64-NEXT: je .LBB3_4 +; X64-NEXT: # %bb.3: +; X64-NEXT: movapd %xmm2, %xmm3 +; X64-NEXT: .LBB3_4: +; X64-NEXT: andpd %xmm3, %xmm1 +; X64-NEXT: orpd %xmm1, %xmm0 +; X64-NEXT: retq +; +; X64AVX-LABEL: maximumnum_double: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2 +; X64AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm2 +; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm0 +; X64AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; X64AVX-NEXT: vcmpltsd %xmm2, %xmm0, %xmm1 +; X64AVX-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm1 +; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm3 +; X64AVX-NEXT: vblendvpd %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; X64AVX-NEXT: vxorpd %xmm3, %xmm3, %xmm3 +; X64AVX-NEXT: vmovq %xmm2, %rax +; X64AVX-NEXT: testq %rax, %rax +; X64AVX-NEXT: je .LBB3_2 +; X64AVX-NEXT: # %bb.1: +; X64AVX-NEXT: vmovapd %xmm1, %xmm2 +; X64AVX-NEXT: .LBB3_2: +; X64AVX-NEXT: vcmpeqsd %xmm3, %xmm1, %xmm3 +; X64AVX-NEXT: vmovq %xmm0, %rax +; X64AVX-NEXT: testq %rax, %rax +; X64AVX-NEXT: je .LBB3_4 +; X64AVX-NEXT: # %bb.3: +; X64AVX-NEXT: vmovapd %xmm2, %xmm0 +; X64AVX-NEXT: .LBB3_4: +; X64AVX-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: maximumnum_double: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpltsd %xmm0, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm2 +; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vcmpunordsd %xmm2, %xmm2, %k1 +; X64AVX512FP16-NEXT: vmovsd {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0] +; X64AVX512FP16-NEXT: vxorpd %xmm3, %xmm3, %xmm3 +; X64AVX512FP16-NEXT: vcmpeqsd %xmm3, %xmm2, %k1 +; X64AVX512FP16-NEXT: vmovq %xmm0, %rax +; X64AVX512FP16-NEXT: testq %rax, %rax +; X64AVX512FP16-NEXT: sete %al +; X64AVX512FP16-NEXT: kmovd %eax, %k2 +; X64AVX512FP16-NEXT: vmovapd %xmm2, %xmm3 +; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm3, %xmm3 {%k2} +; X64AVX512FP16-NEXT: vmovq %xmm1, %rax +; X64AVX512FP16-NEXT: testq %rax, %rax +; X64AVX512FP16-NEXT: sete %al +; X64AVX512FP16-NEXT: kmovd %eax, %k2 +; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm3, %xmm3 {%k2} +; X64AVX512FP16-NEXT: vmovsd %xmm3, %xmm2, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vmovapd %xmm2, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call double @llvm.maximumnum.f64(double %x, double %y) + ret double %z +} + +define double @maximumnum_double_nsz(double %x, double %y) { +; X64-LABEL: maximumnum_double_nsz: +; X64: # %bb.0: +; X64-NEXT: movapd %xmm0, %xmm2 +; X64-NEXT: cmpunordsd %xmm0, %xmm2 +; X64-NEXT: movapd %xmm2, %xmm3 +; X64-NEXT: andpd %xmm1, %xmm3 +; X64-NEXT: andnpd %xmm0, %xmm2 +; X64-NEXT: orpd %xmm3, %xmm2 +; X64-NEXT: movapd %xmm1, %xmm0 +; X64-NEXT: cmpunordsd %xmm1, %xmm0 +; X64-NEXT: movapd %xmm0, %xmm3 +; X64-NEXT: andpd %xmm2, %xmm3 +; X64-NEXT: andnpd %xmm1, %xmm0 +; X64-NEXT: orpd %xmm3, %xmm0 +; X64-NEXT: movapd %xmm0, %xmm1 +; X64-NEXT: cmpltsd %xmm2, %xmm1 +; X64-NEXT: andpd %xmm1, %xmm2 +; X64-NEXT: andnpd %xmm0, %xmm1 +; X64-NEXT: orpd %xmm2, %xmm1 +; X64-NEXT: movapd %xmm1, %xmm2 +; X64-NEXT: cmpunordsd %xmm1, %xmm2 +; X64-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; X64-NEXT: andpd %xmm2, %xmm0 +; X64-NEXT: andnpd %xmm1, %xmm2 +; X64-NEXT: orpd %xmm2, %xmm0 +; X64-NEXT: retq +; +; X64AVX-LABEL: maximumnum_double_nsz: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2 +; X64AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 +; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2 +; X64AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; X64AVX-NEXT: vcmpltsd %xmm0, %xmm1, %xmm2 +; X64AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm1 +; X64AVX-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: maximumnum_double_nsz: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpltsd %xmm0, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovsd {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0] +; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call nsz double @llvm.maximumnum.f64(double %x, double %y) + ret double %z +} + +define double @maximumnum_double_nnan(double %x, double %y) { +; X64-LABEL: maximumnum_double_nnan: +; X64: # %bb.0: +; X64-NEXT: movq %xmm0, %rax +; X64-NEXT: testq %rax, %rax +; X64-NEXT: js .LBB5_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: movdqa %xmm0, %xmm2 +; X64-NEXT: jmp .LBB5_3 +; X64-NEXT: .LBB5_1: +; X64-NEXT: movdqa %xmm1, %xmm2 +; X64-NEXT: movdqa %xmm0, %xmm1 +; X64-NEXT: .LBB5_3: +; X64-NEXT: maxsd %xmm2, %xmm1 +; X64-NEXT: movapd %xmm1, %xmm0 +; X64-NEXT: retq +; +; X64AVX-LABEL: maximumnum_double_nnan: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vmovq %xmm0, %rax +; X64AVX-NEXT: testq %rax, %rax +; X64AVX-NEXT: js .LBB5_1 +; X64AVX-NEXT: # %bb.2: +; X64AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm0 +; X64AVX-NEXT: retq +; X64AVX-NEXT: .LBB5_1: +; X64AVX-NEXT: vmovapd %xmm1, %xmm2 +; X64AVX-NEXT: vmaxsd %xmm2, %xmm0, %xmm0 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: maximumnum_double_nnan: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vfpclasssd $3, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm2 +; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vmaxsd %xmm2, %xmm0, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call nnan double @llvm.maximumnum.f64(double %x, double %y) + ret double %z +} + +define float @minimumnum_float(float %x, float %y) { +; X64-LABEL: minimumnum_float: +; X64: # %bb.0: +; X64-NEXT: movaps %xmm0, %xmm2 +; X64-NEXT: cmpunordss %xmm0, %xmm2 +; X64-NEXT: movaps %xmm2, %xmm3 +; X64-NEXT: andps %xmm1, %xmm3 +; X64-NEXT: andnps %xmm0, %xmm2 +; X64-NEXT: orps %xmm3, %xmm2 +; X64-NEXT: movaps %xmm1, %xmm3 +; X64-NEXT: cmpunordss %xmm1, %xmm3 +; X64-NEXT: movaps %xmm3, %xmm0 +; X64-NEXT: andps %xmm2, %xmm0 +; X64-NEXT: andnps %xmm1, %xmm3 +; X64-NEXT: orps %xmm0, %xmm3 +; X64-NEXT: movaps %xmm2, %xmm0 +; X64-NEXT: cmpltss %xmm3, %xmm0 +; X64-NEXT: movaps %xmm0, %xmm1 +; X64-NEXT: andps %xmm2, %xmm1 +; X64-NEXT: andnps %xmm3, %xmm0 +; X64-NEXT: orps %xmm1, %xmm0 +; X64-NEXT: movaps %xmm0, %xmm1 +; X64-NEXT: cmpunordss %xmm0, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm4 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; X64-NEXT: andps %xmm1, %xmm4 +; X64-NEXT: andnps %xmm0, %xmm1 +; X64-NEXT: orps %xmm1, %xmm4 +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: cmpeqss %xmm4, %xmm1 +; X64-NEXT: movd %xmm2, %eax +; X64-NEXT: negl %eax +; X64-NEXT: jo .LBB6_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movaps %xmm4, %xmm2 +; X64-NEXT: .LBB6_2: +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: andnps %xmm4, %xmm0 +; X64-NEXT: movd %xmm3, %eax +; X64-NEXT: negl %eax +; X64-NEXT: jo .LBB6_4 +; X64-NEXT: # %bb.3: +; X64-NEXT: movaps %xmm2, %xmm3 +; X64-NEXT: .LBB6_4: +; X64-NEXT: andps %xmm3, %xmm1 +; X64-NEXT: orps %xmm1, %xmm0 +; X64-NEXT: retq +; +; X64AVX-LABEL: minimumnum_float: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 +; X64AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm2 +; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm0 +; X64AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; X64AVX-NEXT: vcmpltss %xmm0, %xmm2, %xmm1 +; X64AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm1 +; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm3 +; X64AVX-NEXT: vblendvps %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; X64AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64AVX-NEXT: vmovd %xmm2, %eax +; X64AVX-NEXT: negl %eax +; X64AVX-NEXT: jo .LBB6_2 +; X64AVX-NEXT: # %bb.1: +; X64AVX-NEXT: vmovaps %xmm1, %xmm2 +; X64AVX-NEXT: .LBB6_2: +; X64AVX-NEXT: vcmpeqss %xmm3, %xmm1, %xmm3 +; X64AVX-NEXT: vmovd %xmm0, %eax +; X64AVX-NEXT: negl %eax +; X64AVX-NEXT: jo .LBB6_4 +; X64AVX-NEXT: # %bb.3: +; X64AVX-NEXT: vmovaps %xmm2, %xmm0 +; X64AVX-NEXT: .LBB6_4: +; X64AVX-NEXT: vblendvps %xmm3, %xmm0, %xmm1, %xmm0 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: minimumnum_float: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpltss %xmm1, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2 +; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vcmpunordss %xmm2, %xmm2, %k1 +; X64AVX512FP16-NEXT: vmovss {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0] +; X64AVX512FP16-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64AVX512FP16-NEXT: vcmpeqss %xmm3, %xmm2, %k1 +; X64AVX512FP16-NEXT: vmovd %xmm0, %eax +; X64AVX512FP16-NEXT: negl %eax +; X64AVX512FP16-NEXT: seto %al +; X64AVX512FP16-NEXT: kmovd %eax, %k2 +; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm3 +; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm3, %xmm3 {%k2} +; X64AVX512FP16-NEXT: vmovd %xmm1, %eax +; X64AVX512FP16-NEXT: negl %eax +; X64AVX512FP16-NEXT: seto %al +; X64AVX512FP16-NEXT: kmovd %eax, %k2 +; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm3, %xmm3 {%k2} +; X64AVX512FP16-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call float @llvm.minimumnum.f32(float %x, float %y) + ret float %z +} + +define float @minimumnum_float_nsz(float %x, float %y) { +; X64-LABEL: minimumnum_float_nsz: +; X64: # %bb.0: +; X64-NEXT: movaps %xmm0, %xmm2 +; X64-NEXT: cmpunordss %xmm0, %xmm2 +; X64-NEXT: movaps %xmm2, %xmm3 +; X64-NEXT: andps %xmm1, %xmm3 +; X64-NEXT: andnps %xmm0, %xmm2 +; X64-NEXT: orps %xmm3, %xmm2 +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: cmpunordss %xmm1, %xmm0 +; X64-NEXT: movaps %xmm0, %xmm3 +; X64-NEXT: andps %xmm2, %xmm3 +; X64-NEXT: andnps %xmm1, %xmm0 +; X64-NEXT: orps %xmm3, %xmm0 +; X64-NEXT: movaps %xmm2, %xmm1 +; X64-NEXT: cmpltss %xmm0, %xmm1 +; X64-NEXT: andps %xmm1, %xmm2 +; X64-NEXT: andnps %xmm0, %xmm1 +; X64-NEXT: orps %xmm2, %xmm1 +; X64-NEXT: movaps %xmm1, %xmm2 +; X64-NEXT: cmpunordss %xmm1, %xmm2 +; X64-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; X64-NEXT: andps %xmm2, %xmm0 +; X64-NEXT: andnps %xmm1, %xmm2 +; X64-NEXT: orps %xmm2, %xmm0 +; X64-NEXT: retq +; +; X64AVX-LABEL: minimumnum_float_nsz: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2 +; X64AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 +; X64AVX-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2 +; X64AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm1 +; X64AVX-NEXT: vcmpltss %xmm1, %xmm0, %xmm2 +; X64AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; X64AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm1 +; X64AVX-NEXT: vblendvps %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: minimumnum_float_nsz: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpltss %xmm1, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpunordss %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovss {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0] +; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call nsz float @llvm.minimumnum.f32(float %x, float %y) + ret float %z +} + +define float @minimumnum_float_nnan(float %x, float %y) { +; X64-LABEL: minimumnum_float_nnan: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: js .LBB8_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: minss %xmm1, %xmm0 +; X64-NEXT: retq +; X64-NEXT: .LBB8_1: +; X64-NEXT: movdqa %xmm0, %xmm2 +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: minss %xmm2, %xmm0 +; X64-NEXT: retq +; +; X64AVX-LABEL: minimumnum_float_nnan: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vmovd %xmm0, %eax +; X64AVX-NEXT: testl %eax, %eax +; X64AVX-NEXT: js .LBB8_1 +; X64AVX-NEXT: # %bb.2: +; X64AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 +; X64AVX-NEXT: retq +; X64AVX-NEXT: .LBB8_1: +; X64AVX-NEXT: vmovdqa %xmm0, %xmm2 +; X64AVX-NEXT: vminss %xmm2, %xmm1, %xmm0 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: minimumnum_float_nnan: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vfpclassss $5, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2 +; X64AVX512FP16-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vminss %xmm2, %xmm0, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call nnan float @llvm.minimumnum.f32(float %x, float %y) + ret float %z +} + +define double @minimumnum_double(double %x, double %y) { +; X64-LABEL: minimumnum_double: +; X64: # %bb.0: +; X64-NEXT: movapd %xmm0, %xmm2 +; X64-NEXT: cmpunordsd %xmm0, %xmm2 +; X64-NEXT: movapd %xmm2, %xmm3 +; X64-NEXT: andpd %xmm1, %xmm3 +; X64-NEXT: andnpd %xmm0, %xmm2 +; X64-NEXT: orpd %xmm3, %xmm2 +; X64-NEXT: movapd %xmm1, %xmm3 +; X64-NEXT: cmpunordsd %xmm1, %xmm3 +; X64-NEXT: movapd %xmm3, %xmm0 +; X64-NEXT: andpd %xmm2, %xmm0 +; X64-NEXT: andnpd %xmm1, %xmm3 +; X64-NEXT: orpd %xmm0, %xmm3 +; X64-NEXT: movapd %xmm2, %xmm0 +; X64-NEXT: cmpltsd %xmm3, %xmm0 +; X64-NEXT: movapd %xmm0, %xmm1 +; X64-NEXT: andpd %xmm2, %xmm1 +; X64-NEXT: andnpd %xmm3, %xmm0 +; X64-NEXT: orpd %xmm1, %xmm0 +; X64-NEXT: movapd %xmm0, %xmm1 +; X64-NEXT: cmpunordsd %xmm0, %xmm1 +; X64-NEXT: movsd {{.*#+}} xmm4 = [NaN,0.0E+0] +; X64-NEXT: andpd %xmm1, %xmm4 +; X64-NEXT: andnpd %xmm0, %xmm1 +; X64-NEXT: orpd %xmm1, %xmm4 +; X64-NEXT: xorpd %xmm1, %xmm1 +; X64-NEXT: cmpeqsd %xmm4, %xmm1 +; X64-NEXT: movq %xmm2, %rax +; X64-NEXT: negq %rax +; X64-NEXT: jo .LBB9_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movapd %xmm4, %xmm2 +; X64-NEXT: .LBB9_2: +; X64-NEXT: movapd %xmm1, %xmm0 +; X64-NEXT: andnpd %xmm4, %xmm0 +; X64-NEXT: movq %xmm3, %rax +; X64-NEXT: negq %rax +; X64-NEXT: jo .LBB9_4 +; X64-NEXT: # %bb.3: +; X64-NEXT: movapd %xmm2, %xmm3 +; X64-NEXT: .LBB9_4: +; X64-NEXT: andpd %xmm3, %xmm1 +; X64-NEXT: orpd %xmm1, %xmm0 +; X64-NEXT: retq +; +; X64AVX-LABEL: minimumnum_double: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2 +; X64AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm2 +; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm0 +; X64AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; X64AVX-NEXT: vcmpltsd %xmm0, %xmm2, %xmm1 +; X64AVX-NEXT: vblendvpd %xmm1, %xmm2, %xmm0, %xmm1 +; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm3 +; X64AVX-NEXT: vblendvpd %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; X64AVX-NEXT: vxorpd %xmm3, %xmm3, %xmm3 +; X64AVX-NEXT: vmovq %xmm2, %rax +; X64AVX-NEXT: negq %rax +; X64AVX-NEXT: jo .LBB9_2 +; X64AVX-NEXT: # %bb.1: +; X64AVX-NEXT: vmovapd %xmm1, %xmm2 +; X64AVX-NEXT: .LBB9_2: +; X64AVX-NEXT: vcmpeqsd %xmm3, %xmm1, %xmm3 +; X64AVX-NEXT: vmovq %xmm0, %rax +; X64AVX-NEXT: negq %rax +; X64AVX-NEXT: jo .LBB9_4 +; X64AVX-NEXT: # %bb.3: +; X64AVX-NEXT: vmovapd %xmm2, %xmm0 +; X64AVX-NEXT: .LBB9_4: +; X64AVX-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: minimumnum_double: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpltsd %xmm1, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm2 +; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vcmpunordsd %xmm2, %xmm2, %k1 +; X64AVX512FP16-NEXT: vmovsd {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0] +; X64AVX512FP16-NEXT: vxorpd %xmm3, %xmm3, %xmm3 +; X64AVX512FP16-NEXT: vcmpeqsd %xmm3, %xmm2, %k1 +; X64AVX512FP16-NEXT: vmovq %xmm0, %rax +; X64AVX512FP16-NEXT: negq %rax +; X64AVX512FP16-NEXT: seto %al +; X64AVX512FP16-NEXT: kmovd %eax, %k2 +; X64AVX512FP16-NEXT: vmovapd %xmm2, %xmm3 +; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm3, %xmm3 {%k2} +; X64AVX512FP16-NEXT: vmovq %xmm1, %rax +; X64AVX512FP16-NEXT: negq %rax +; X64AVX512FP16-NEXT: seto %al +; X64AVX512FP16-NEXT: kmovd %eax, %k2 +; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm3, %xmm3 {%k2} +; X64AVX512FP16-NEXT: vmovsd %xmm3, %xmm2, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vmovapd %xmm2, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call double @llvm.minimumnum.f64(double %x, double %y) + ret double %z +} + +define double @minimumnum_double_nsz(double %x, double %y) { +; X64-LABEL: minimumnum_double_nsz: +; X64: # %bb.0: +; X64-NEXT: movapd %xmm0, %xmm2 +; X64-NEXT: cmpunordsd %xmm0, %xmm2 +; X64-NEXT: movapd %xmm2, %xmm3 +; X64-NEXT: andpd %xmm1, %xmm3 +; X64-NEXT: andnpd %xmm0, %xmm2 +; X64-NEXT: orpd %xmm3, %xmm2 +; X64-NEXT: movapd %xmm1, %xmm0 +; X64-NEXT: cmpunordsd %xmm1, %xmm0 +; X64-NEXT: movapd %xmm0, %xmm3 +; X64-NEXT: andpd %xmm2, %xmm3 +; X64-NEXT: andnpd %xmm1, %xmm0 +; X64-NEXT: orpd %xmm3, %xmm0 +; X64-NEXT: movapd %xmm2, %xmm1 +; X64-NEXT: cmpltsd %xmm0, %xmm1 +; X64-NEXT: andpd %xmm1, %xmm2 +; X64-NEXT: andnpd %xmm0, %xmm1 +; X64-NEXT: orpd %xmm2, %xmm1 +; X64-NEXT: movapd %xmm1, %xmm2 +; X64-NEXT: cmpunordsd %xmm1, %xmm2 +; X64-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; X64-NEXT: andpd %xmm2, %xmm0 +; X64-NEXT: andnpd %xmm1, %xmm2 +; X64-NEXT: orpd %xmm2, %xmm0 +; X64-NEXT: retq +; +; X64AVX-LABEL: minimumnum_double_nsz: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm2 +; X64AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 +; X64AVX-NEXT: vcmpunordsd %xmm1, %xmm1, %xmm2 +; X64AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 +; X64AVX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2 +; X64AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; X64AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm1 +; X64AVX-NEXT: vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: minimumnum_double_nsz: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpltsd %xmm1, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm1, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpunordsd %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovsd {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0] +; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call nsz double @llvm.minimumnum.f64(double %x, double %y) + ret double %z +} + +define double @minimumnum_double_nnan(double %x, double %y) { +; X64-LABEL: minimumnum_double_nnan: +; X64: # %bb.0: +; X64-NEXT: movq %xmm0, %rax +; X64-NEXT: testq %rax, %rax +; X64-NEXT: js .LBB11_1 +; X64-NEXT: # %bb.2: +; X64-NEXT: minsd %xmm1, %xmm0 +; X64-NEXT: retq +; X64-NEXT: .LBB11_1: +; X64-NEXT: movdqa %xmm0, %xmm2 +; X64-NEXT: movapd %xmm1, %xmm0 +; X64-NEXT: minsd %xmm2, %xmm0 +; X64-NEXT: retq +; +; X64AVX-LABEL: minimumnum_double_nnan: +; X64AVX: # %bb.0: +; X64AVX-NEXT: vmovq %xmm0, %rax +; X64AVX-NEXT: testq %rax, %rax +; X64AVX-NEXT: js .LBB11_1 +; X64AVX-NEXT: # %bb.2: +; X64AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; X64AVX-NEXT: retq +; X64AVX-NEXT: .LBB11_1: +; X64AVX-NEXT: vmovdqa %xmm0, %xmm2 +; X64AVX-NEXT: vminsd %xmm2, %xmm1, %xmm0 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: minimumnum_double_nnan: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vfpclasssd $5, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovapd %xmm1, %xmm2 +; X64AVX512FP16-NEXT: vmovsd %xmm0, %xmm2, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vminsd %xmm2, %xmm0, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call nnan double @llvm.minimumnum.f64(double %x, double %y) + ret double %z +} + +define half @minimumnum_half(half %x, half %y) { +; X64-LABEL: minimumnum_half: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: subq $56, %rsp +; X64-NEXT: .cfi_def_cfa_offset 80 +; X64-NEXT: .cfi_offset %rbx, -24 +; X64-NEXT: .cfi_offset %rbp, -16 +; X64-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: ucomiss %xmm0, %xmm0 +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: jp .LBB12_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: .LBB12_2: +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: ucomiss %xmm0, %xmm0 +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: jp .LBB12_4 +; X64-NEXT: # %bb.3: +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: .LBB12_4: +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: jb .LBB12_6 +; X64-NEXT: # %bb.5: +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: .LBB12_6: +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: ucomiss %xmm0, %xmm0 +; X64-NEXT: jp .LBB12_7 +; X64-NEXT: # %bb.8: +; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: jmp .LBB12_9 +; X64-NEXT: .LBB12_7: +; X64-NEXT: movd {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; X64-NEXT: .LBB12_9: +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: pextrw $0, %xmm0, %ebx +; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; X64-NEXT: pextrw $0, %xmm1, %eax +; X64-NEXT: movzwl %ax, %ecx +; X64-NEXT: cmpl $32768, %ecx # imm = 0x8000 +; X64-NEXT: cmovnel %ebx, %eax +; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; X64-NEXT: pextrw $0, %xmm1, %ebp +; X64-NEXT: movzwl %bp, %ecx +; X64-NEXT: cmpl $32768, %ecx # imm = 0x8000 +; X64-NEXT: cmovnel %eax, %ebp +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: pxor %xmm1, %xmm1 +; X64-NEXT: ucomiss %xmm1, %xmm0 +; X64-NEXT: cmovnel %ebx, %ebp +; X64-NEXT: pinsrw $0, %ebp, %xmm0 +; X64-NEXT: addq $56, %rsp +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: popq %rbx +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %rbp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; X64AVX-LABEL: minimumnum_half: +; X64AVX: # %bb.0: +; X64AVX-NEXT: pushq %rbp +; X64AVX-NEXT: .cfi_def_cfa_offset 16 +; X64AVX-NEXT: pushq %rbx +; X64AVX-NEXT: .cfi_def_cfa_offset 24 +; X64AVX-NEXT: subq $56, %rsp +; X64AVX-NEXT: .cfi_def_cfa_offset 80 +; X64AVX-NEXT: .cfi_offset %rbx, -24 +; X64AVX-NEXT: .cfi_offset %rbp, -16 +; X64AVX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vucomiss %xmm0, %xmm0 +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: jp .LBB12_2 +; X64AVX-NEXT: # %bb.1: +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: .LBB12_2: +; X64AVX-NEXT: callq __truncsfhf2@PLT +; X64AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vucomiss %xmm0, %xmm0 +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: jp .LBB12_4 +; X64AVX-NEXT: # %bb.3: +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: .LBB12_4: +; X64AVX-NEXT: callq __truncsfhf2@PLT +; X64AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: jb .LBB12_6 +; X64AVX-NEXT: # %bb.5: +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: .LBB12_6: +; X64AVX-NEXT: callq __truncsfhf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vucomiss %xmm0, %xmm0 +; X64AVX-NEXT: jp .LBB12_7 +; X64AVX-NEXT: # %bb.8: +; X64AVX-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: jmp .LBB12_9 +; X64AVX-NEXT: .LBB12_7: +; X64AVX-NEXT: vmovd {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; X64AVX-NEXT: .LBB12_9: +; X64AVX-NEXT: callq __truncsfhf2@PLT +; X64AVX-NEXT: vpextrw $0, %xmm0, %ebx +; X64AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; X64AVX-NEXT: vpextrw $0, %xmm1, %eax +; X64AVX-NEXT: movzwl %ax, %ecx +; X64AVX-NEXT: cmpl $32768, %ecx # imm = 0x8000 +; X64AVX-NEXT: cmovnel %ebx, %eax +; X64AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; X64AVX-NEXT: vpextrw $0, %xmm1, %ebp +; X64AVX-NEXT: movzwl %bp, %ecx +; X64AVX-NEXT: cmpl $32768, %ecx # imm = 0x8000 +; X64AVX-NEXT: cmovnel %eax, %ebp +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64AVX-NEXT: vucomiss %xmm1, %xmm0 +; X64AVX-NEXT: cmovnel %ebx, %ebp +; X64AVX-NEXT: vpinsrw $0, %ebp, %xmm0, %xmm0 +; X64AVX-NEXT: addq $56, %rsp +; X64AVX-NEXT: .cfi_def_cfa_offset 24 +; X64AVX-NEXT: popq %rbx +; X64AVX-NEXT: .cfi_def_cfa_offset 16 +; X64AVX-NEXT: popq %rbp +; X64AVX-NEXT: .cfi_def_cfa_offset 8 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: minimumnum_half: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vcmpunordsh %xmm0, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vcmpunordsh %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpltsh %xmm1, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2 +; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vcmpunordsh %xmm2, %xmm2, %k1 +; X64AVX512FP16-NEXT: vmovsh {{.*#+}} xmm3 = [NaN,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X64AVX512FP16-NEXT: vmovsh %xmm3, %xmm0, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; X64AVX512FP16-NEXT: vcmpeqsh %xmm3, %xmm2, %k1 +; X64AVX512FP16-NEXT: vmovw %xmm0, %eax +; X64AVX512FP16-NEXT: movzwl %ax, %eax +; X64AVX512FP16-NEXT: cmpl $32768, %eax # imm = 0x8000 +; X64AVX512FP16-NEXT: sete %al +; X64AVX512FP16-NEXT: kmovd %eax, %k2 +; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm3 +; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm3 {%k2} +; X64AVX512FP16-NEXT: vmovw %xmm1, %eax +; X64AVX512FP16-NEXT: movzwl %ax, %eax +; X64AVX512FP16-NEXT: cmpl $32768, %eax # imm = 0x8000 +; X64AVX512FP16-NEXT: sete %al +; X64AVX512FP16-NEXT: kmovd %eax, %k2 +; X64AVX512FP16-NEXT: vmovsh %xmm1, %xmm0, %xmm3 {%k2} +; X64AVX512FP16-NEXT: vmovsh %xmm3, %xmm0, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vmovaps %xmm2, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call half @llvm.minimumnum.f16(half %x, half %y) + ret half %z +} + +define half @minimumnum_half_nsz(half %x, half %y) { +; X64-LABEL: minimumnum_half_nsz: +; X64: # %bb.0: +; X64-NEXT: subq $24, %rsp +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: ucomiss %xmm0, %xmm0 +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: jp .LBB13_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: .LBB13_2: +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: ucomiss %xmm0, %xmm0 +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: jp .LBB13_4 +; X64-NEXT: # %bb.3: +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: .LBB13_4: +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: jb .LBB13_6 +; X64-NEXT: # %bb.5: +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: .LBB13_6: +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: ucomiss %xmm0, %xmm0 +; X64-NEXT: jp .LBB13_7 +; X64-NEXT: # %bb.8: +; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: jmp .LBB13_9 +; X64-NEXT: .LBB13_7: +; X64-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; X64-NEXT: .LBB13_9: +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; X64AVX-LABEL: minimumnum_half_nsz: +; X64AVX: # %bb.0: +; X64AVX-NEXT: subq $24, %rsp +; X64AVX-NEXT: .cfi_def_cfa_offset 32 +; X64AVX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vucomiss %xmm0, %xmm0 +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: jp .LBB13_2 +; X64AVX-NEXT: # %bb.1: +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: .LBB13_2: +; X64AVX-NEXT: callq __truncsfhf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vucomiss %xmm0, %xmm0 +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: jp .LBB13_4 +; X64AVX-NEXT: # %bb.3: +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: .LBB13_4: +; X64AVX-NEXT: callq __truncsfhf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: jb .LBB13_6 +; X64AVX-NEXT: # %bb.5: +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: .LBB13_6: +; X64AVX-NEXT: callq __truncsfhf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vucomiss %xmm0, %xmm0 +; X64AVX-NEXT: jp .LBB13_7 +; X64AVX-NEXT: # %bb.8: +; X64AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: jmp .LBB13_9 +; X64AVX-NEXT: .LBB13_7: +; X64AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; X64AVX-NEXT: .LBB13_9: +; X64AVX-NEXT: callq __truncsfhf2@PLT +; X64AVX-NEXT: addq $24, %rsp +; X64AVX-NEXT: .cfi_def_cfa_offset 8 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: minimumnum_half_nsz: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vcmpunordsh %xmm0, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vcmpunordsh %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpltsh %xmm1, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vcmpunordsh %xmm1, %xmm1, %k1 +; X64AVX512FP16-NEXT: vmovsh {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1} +; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call nsz half @llvm.minimumnum.f16(half %x, half %y) + ret half %z +} + +define half @minimumnum_half_nnan(half %x, half %y) { +; X64-LABEL: minimumnum_half_nnan: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: pushq %r14 +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: subq $48, %rsp +; X64-NEXT: .cfi_def_cfa_offset 80 +; X64-NEXT: .cfi_offset %rbx, -32 +; X64-NEXT: .cfi_offset %r14, -24 +; X64-NEXT: .cfi_offset %rbp, -16 +; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: pextrw $0, %xmm1, %ebx +; X64-NEXT: pextrw $0, %xmm0, %ebp +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: jb .LBB14_2 +; X64-NEXT: # %bb.1: +; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; X64-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64-NEXT: .LBB14_2: +; X64-NEXT: callq __truncsfhf2@PLT +; X64-NEXT: pextrw $0, %xmm0, %r14d +; X64-NEXT: movzwl %bp, %eax +; X64-NEXT: cmpl $32768, %eax # imm = 0x8000 +; X64-NEXT: movl %r14d, %eax +; X64-NEXT: cmovel %ebp, %eax +; X64-NEXT: movzwl %bx, %ecx +; X64-NEXT: cmpl $32768, %ecx # imm = 0x8000 +; X64-NEXT: cmovnel %eax, %ebx +; X64-NEXT: callq __extendhfsf2@PLT +; X64-NEXT: pxor %xmm1, %xmm1 +; X64-NEXT: ucomiss %xmm1, %xmm0 +; X64-NEXT: cmovnel %r14d, %ebx +; X64-NEXT: pinsrw $0, %ebx, %xmm0 +; X64-NEXT: addq $48, %rsp +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: popq %rbx +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: popq %r14 +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %rbp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; X64AVX-LABEL: minimumnum_half_nnan: +; X64AVX: # %bb.0: +; X64AVX-NEXT: pushq %rbp +; X64AVX-NEXT: .cfi_def_cfa_offset 16 +; X64AVX-NEXT: pushq %r14 +; X64AVX-NEXT: .cfi_def_cfa_offset 24 +; X64AVX-NEXT: pushq %rbx +; X64AVX-NEXT: .cfi_def_cfa_offset 32 +; X64AVX-NEXT: subq $48, %rsp +; X64AVX-NEXT: .cfi_def_cfa_offset 80 +; X64AVX-NEXT: .cfi_offset %rbx, -32 +; X64AVX-NEXT: .cfi_offset %r14, -24 +; X64AVX-NEXT: .cfi_offset %rbp, -16 +; X64AVX-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64AVX-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64AVX-NEXT: vpextrw $0, %xmm1, %ebx +; X64AVX-NEXT: vpextrw $0, %xmm0, %ebp +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; X64AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; X64AVX-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: jb .LBB14_2 +; X64AVX-NEXT: # %bb.1: +; X64AVX-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; X64AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; X64AVX-NEXT: .LBB14_2: +; X64AVX-NEXT: callq __truncsfhf2@PLT +; X64AVX-NEXT: vpextrw $0, %xmm0, %r14d +; X64AVX-NEXT: movzwl %bp, %eax +; X64AVX-NEXT: cmpl $32768, %eax # imm = 0x8000 +; X64AVX-NEXT: movl %r14d, %eax +; X64AVX-NEXT: cmovel %ebp, %eax +; X64AVX-NEXT: movzwl %bx, %ecx +; X64AVX-NEXT: cmpl $32768, %ecx # imm = 0x8000 +; X64AVX-NEXT: cmovnel %eax, %ebx +; X64AVX-NEXT: callq __extendhfsf2@PLT +; X64AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64AVX-NEXT: vucomiss %xmm1, %xmm0 +; X64AVX-NEXT: cmovnel %r14d, %ebx +; X64AVX-NEXT: vpinsrw $0, %ebx, %xmm0, %xmm0 +; X64AVX-NEXT: addq $48, %rsp +; X64AVX-NEXT: .cfi_def_cfa_offset 32 +; X64AVX-NEXT: popq %rbx +; X64AVX-NEXT: .cfi_def_cfa_offset 24 +; X64AVX-NEXT: popq %r14 +; X64AVX-NEXT: .cfi_def_cfa_offset 16 +; X64AVX-NEXT: popq %rbp +; X64AVX-NEXT: .cfi_def_cfa_offset 8 +; X64AVX-NEXT: retq +; +; X64AVX512FP16-LABEL: minimumnum_half_nnan: +; X64AVX512FP16: # %bb.0: +; X64AVX512FP16-NEXT: vfpclasssh $5, %xmm0, %k1 +; X64AVX512FP16-NEXT: vmovaps %xmm1, %xmm2 +; X64AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1} +; X64AVX512FP16-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1} +; X64AVX512FP16-NEXT: vminsh %xmm2, %xmm0, %xmm0 +; X64AVX512FP16-NEXT: retq + %z = call nnan half @llvm.minimumnum.f16(half %x, half %y) + ret half %z +} +