SelectionDAG: Use qNaN constant if FCANONICALIZE not LegalOrCustom #104564

wzssyqa · 2024-08-16T08:56:11Z

The default Action of ISD::FCANONICALIZE is Legal, while in fact, on most architectures, it is not defined. X86 is included.

Let's set the Action of ISD::FCANONICALIZE to Expand on X86, so that we can determine whether it is LegalOrCustom.

The default Action of ISD::FCANONICALIZE is Legal, while in fact, on most architectures, it is not defined. X86 is included. Let's set the Action of ISD::FCANONICALIZE to Expand on X86, so that we can determine whether it is LegalOrCustom.

llvmbot · 2024-08-16T08:56:40Z

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-x86

Author: YunQiang Su (wzssyqa)

Changes

The default Action of ISD::FCANONICALIZE is Legal, while in fact, on most architectures, it is not defined. X86 is included.

Let's set the Action of ISD::FCANONICALIZE to Expand on X86, so that we can determine whether it is LegalOrCustom.

Patch is 57.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104564.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+10-2)
(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+3)
(added) llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll (+1363)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 2c939967a5e1d9..7ee28d08d556c3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8622,8 +8622,16 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
   // If MinMax is NaN, let's quiet it.
   if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
       !DAG.isKnownNeverNaN(RHS)) {
-    SDValue MinMaxQuiet =
-        DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
+    SDValue MinMaxQuiet;
+    if (isOperationLegalOrCustom(ISD::FCANONICALIZE, VT)) {
+      MinMaxQuiet = DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
+    } else {
+      // MIPS pre-R5 and HPPA use different encoding of qNaN and sNaN.
+      // ISD::FCANONICALIZE is supported by MIPS.
+      // HPPA is not supported by LLVM yet.
+      MinMaxQuiet =
+          DAG.getConstantFP(APFloat::getQNaN(VT.getFltSemantics()), DL, VT);
+    }
     MinMax =
         DAG.getSelectCC(DL, MinMax, MinMax, MinMaxQuiet, MinMax, ISD::SETUO);
   }
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 11c9a992cbdee9..e929d98e959715 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -608,6 +608,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FMAXNUM, VT, Action);
     setOperationAction(ISD::FMINIMUM, VT, Action);
     setOperationAction(ISD::FMAXIMUM, VT, Action);
+    setOperationAction(ISD::FCANONICALIZE, VT, Action);
     setOperationAction(ISD::FSIN, VT, Action);
     setOperationAction(ISD::FCOS, VT, Action);
     setOperationAction(ISD::FSINCOS, VT, Action);
@@ -668,6 +669,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FSIN   , VT, Expand);
       setOperationAction(ISD::FCOS   , VT, Expand);
       setOperationAction(ISD::FSINCOS, VT, Expand);
+
+      setOperationAction(ISD::FCANONICALIZE, VT, Expand);
     }
 
     // Half type will be promoted by default.
diff --git a/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll
new file mode 100644
index 00000000000000..6dd7e582fae0b5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp-maximumnum-minimumnum.ll
@@ -0,0 +1,1363 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=x86_64 < %s | FileCheck %s --check-prefix=X64
+; RUN: llc --mtriple=x86_64 --mattr=+avx < %s | FileCheck %s --check-prefix=X64AVX
+; RUN: llc --mtriple=x86_64 --mattr=+avx512fp16 < %s | FileCheck %s --check-prefix=X64AVX512FP16
+
+declare float @llvm.maximumnum.f32(float, float)
+declare double @llvm.maximumnum.f64(double, double)
+declare float @llvm.minimumnum.f32(float, float)
+declare double @llvm.minimumnum.f64(double, double)
+
+define float @maximumnum_float(float %x, float %y) {
+; X64-LABEL: maximumnum_float:
+; X64:       # %bb.0:
+; X64-NEXT:    movaps %xmm0, %xmm2
+; X64-NEXT:    cmpunordss %xmm0, %xmm2
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:    andps %xmm1, %xmm3
+; X64-NEXT:    andnps %xmm0, %xmm2
+; X64-NEXT:    orps %xmm3, %xmm2
+; X64-NEXT:    movaps %xmm1, %xmm3
+; X64-NEXT:    cmpunordss %xmm1, %xmm3
+; X64-NEXT:    movaps %xmm3, %xmm0
+; X64-NEXT:    andps %xmm2, %xmm0
+; X64-NEXT:    andnps %xmm1, %xmm3
+; X64-NEXT:    orps %xmm0, %xmm3
+; X64-NEXT:    movaps %xmm3, %xmm0
+; X64-NEXT:    cmpltss %xmm2, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    andps %xmm2, %xmm1
+; X64-NEXT:    andnps %xmm3, %xmm0
+; X64-NEXT:    orps %xmm1, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    cmpunordss %xmm0, %xmm1
+; X64-NEXT:    movss {{.*#+}} xmm4 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT:    andps %xmm1, %xmm4
+; X64-NEXT:    andnps %xmm0, %xmm1
+; X64-NEXT:    orps %xmm1, %xmm4
+; X64-NEXT:    xorps %xmm1, %xmm1
+; X64-NEXT:    cmpeqss %xmm4, %xmm1
+; X64-NEXT:    movd %xmm2, %eax
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    je .LBB0_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    movaps %xmm4, %xmm2
+; X64-NEXT:  .LBB0_2:
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    andnps %xmm4, %xmm0
+; X64-NEXT:    movd %xmm3, %eax
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    je .LBB0_4
+; X64-NEXT:  # %bb.3:
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:  .LBB0_4:
+; X64-NEXT:    andps %xmm3, %xmm1
+; X64-NEXT:    orps %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_float:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpltss %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT:    vblendvps %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT:    vmovd %xmm2, %eax
+; X64AVX-NEXT:    testl %eax, %eax
+; X64AVX-NEXT:    je .LBB0_2
+; X64AVX-NEXT:  # %bb.1:
+; X64AVX-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX-NEXT:  .LBB0_2:
+; X64AVX-NEXT:    vcmpeqss %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT:    vmovd %xmm0, %eax
+; X64AVX-NEXT:    testl %eax, %eax
+; X64AVX-NEXT:    je .LBB0_4
+; X64AVX-NEXT:  # %bb.3:
+; X64AVX-NEXT:    vmovaps %xmm2, %xmm0
+; X64AVX-NEXT:  .LBB0_4:
+; X64AVX-NEXT:    vblendvps %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltss %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovss {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT:    vxorps %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT:    vcmpeqss %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovd %xmm0, %eax
+; X64AVX512FP16-NEXT:    testl %eax, %eax
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovaps %xmm2, %xmm3
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovd %xmm1, %eax
+; X64AVX512FP16-NEXT:    testl %eax, %eax
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovaps %xmm2, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nsz(float %x, float %y) {
+; X64-LABEL: maximumnum_float_nsz:
+; X64:       # %bb.0:
+; X64-NEXT:    movaps %xmm0, %xmm2
+; X64-NEXT:    cmpunordss %xmm0, %xmm2
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:    andps %xmm1, %xmm3
+; X64-NEXT:    andnps %xmm0, %xmm2
+; X64-NEXT:    orps %xmm3, %xmm2
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    cmpunordss %xmm1, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm3
+; X64-NEXT:    andps %xmm2, %xmm3
+; X64-NEXT:    andnps %xmm1, %xmm0
+; X64-NEXT:    orps %xmm3, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    cmpltss %xmm2, %xmm1
+; X64-NEXT:    andps %xmm1, %xmm2
+; X64-NEXT:    andnps %xmm0, %xmm1
+; X64-NEXT:    orps %xmm2, %xmm1
+; X64-NEXT:    movaps %xmm1, %xmm2
+; X64-NEXT:    cmpunordss %xmm1, %xmm2
+; X64-NEXT:    movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT:    andps %xmm2, %xmm0
+; X64-NEXT:    andnps %xmm1, %xmm2
+; X64-NEXT:    orps %xmm2, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_float_nsz:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT:    vcmpltss %xmm0, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvps %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float_nsz:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltss %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovss {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nsz float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nnan(float %x, float %y) {
+; X64-LABEL: maximumnum_float_nnan:
+; X64:       # %bb.0:
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    js .LBB2_1
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    movdqa %xmm0, %xmm2
+; X64-NEXT:    jmp .LBB2_3
+; X64-NEXT:  .LBB2_1:
+; X64-NEXT:    movdqa %xmm1, %xmm2
+; X64-NEXT:    movdqa %xmm0, %xmm1
+; X64-NEXT:  .LBB2_3:
+; X64-NEXT:    maxss %xmm2, %xmm1
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_float_nnan:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vmovd %xmm0, %eax
+; X64AVX-NEXT:    testl %eax, %eax
+; X64AVX-NEXT:    js .LBB2_1
+; X64AVX-NEXT:  # %bb.2:
+; X64AVX-NEXT:    vmaxss %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+; X64AVX-NEXT:  .LBB2_1:
+; X64AVX-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX-NEXT:    vmaxss %xmm2, %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_float_nnan:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vfpclassss $3, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovaps %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vmaxss %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nnan float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+
+define double @maximumnum_double(double %x, double %y) {
+; X64-LABEL: maximumnum_double:
+; X64:       # %bb.0:
+; X64-NEXT:    movapd %xmm0, %xmm2
+; X64-NEXT:    cmpunordsd %xmm0, %xmm2
+; X64-NEXT:    movapd %xmm2, %xmm3
+; X64-NEXT:    andpd %xmm1, %xmm3
+; X64-NEXT:    andnpd %xmm0, %xmm2
+; X64-NEXT:    orpd %xmm3, %xmm2
+; X64-NEXT:    movapd %xmm1, %xmm3
+; X64-NEXT:    cmpunordsd %xmm1, %xmm3
+; X64-NEXT:    movapd %xmm3, %xmm0
+; X64-NEXT:    andpd %xmm2, %xmm0
+; X64-NEXT:    andnpd %xmm1, %xmm3
+; X64-NEXT:    orpd %xmm0, %xmm3
+; X64-NEXT:    movapd %xmm3, %xmm0
+; X64-NEXT:    cmpltsd %xmm2, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm1
+; X64-NEXT:    andpd %xmm2, %xmm1
+; X64-NEXT:    andnpd %xmm3, %xmm0
+; X64-NEXT:    orpd %xmm1, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm1
+; X64-NEXT:    cmpunordsd %xmm0, %xmm1
+; X64-NEXT:    movsd {{.*#+}} xmm4 = [NaN,0.0E+0]
+; X64-NEXT:    andpd %xmm1, %xmm4
+; X64-NEXT:    andnpd %xmm0, %xmm1
+; X64-NEXT:    orpd %xmm1, %xmm4
+; X64-NEXT:    xorpd %xmm1, %xmm1
+; X64-NEXT:    cmpeqsd %xmm4, %xmm1
+; X64-NEXT:    movq %xmm2, %rax
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    je .LBB3_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    movapd %xmm4, %xmm2
+; X64-NEXT:  .LBB3_2:
+; X64-NEXT:    movapd %xmm1, %xmm0
+; X64-NEXT:    andnpd %xmm4, %xmm0
+; X64-NEXT:    movq %xmm3, %rax
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    je .LBB3_4
+; X64-NEXT:  # %bb.3:
+; X64-NEXT:    movapd %xmm2, %xmm3
+; X64-NEXT:  .LBB3_4:
+; X64-NEXT:    andpd %xmm3, %xmm1
+; X64-NEXT:    orpd %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_double:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpltsd %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvpd %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT:    vblendvpd %xmm3, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
+; X64AVX-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX-NEXT:    vmovq %xmm2, %rax
+; X64AVX-NEXT:    testq %rax, %rax
+; X64AVX-NEXT:    je .LBB3_2
+; X64AVX-NEXT:  # %bb.1:
+; X64AVX-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX-NEXT:  .LBB3_2:
+; X64AVX-NEXT:    vcmpeqsd %xmm3, %xmm1, %xmm3
+; X64AVX-NEXT:    vmovq %xmm0, %rax
+; X64AVX-NEXT:    testq %rax, %rax
+; X64AVX-NEXT:    je .LBB3_4
+; X64AVX-NEXT:  # %bb.3:
+; X64AVX-NEXT:    vmovapd %xmm2, %xmm0
+; X64AVX-NEXT:  .LBB3_4:
+; X64AVX-NEXT:    vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltsd %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm2, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovsd {{.*#+}} xmm2 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
+; X64AVX512FP16-NEXT:    vcmpeqsd %xmm3, %xmm2, %k1
+; X64AVX512FP16-NEXT:    vmovq %xmm0, %rax
+; X64AVX512FP16-NEXT:    testq %rax, %rax
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovapd %xmm2, %xmm3
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovq %xmm1, %rax
+; X64AVX512FP16-NEXT:    testq %rax, %rax
+; X64AVX512FP16-NEXT:    sete %al
+; X64AVX512FP16-NEXT:    kmovd %eax, %k2
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm3, %xmm3 {%k2}
+; X64AVX512FP16-NEXT:    vmovsd %xmm3, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovapd %xmm2, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nsz(double %x, double %y) {
+; X64-LABEL: maximumnum_double_nsz:
+; X64:       # %bb.0:
+; X64-NEXT:    movapd %xmm0, %xmm2
+; X64-NEXT:    cmpunordsd %xmm0, %xmm2
+; X64-NEXT:    movapd %xmm2, %xmm3
+; X64-NEXT:    andpd %xmm1, %xmm3
+; X64-NEXT:    andnpd %xmm0, %xmm2
+; X64-NEXT:    orpd %xmm3, %xmm2
+; X64-NEXT:    movapd %xmm1, %xmm0
+; X64-NEXT:    cmpunordsd %xmm1, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm3
+; X64-NEXT:    andpd %xmm2, %xmm3
+; X64-NEXT:    andnpd %xmm1, %xmm0
+; X64-NEXT:    orpd %xmm3, %xmm0
+; X64-NEXT:    movapd %xmm0, %xmm1
+; X64-NEXT:    cmpltsd %xmm2, %xmm1
+; X64-NEXT:    andpd %xmm1, %xmm2
+; X64-NEXT:    andnpd %xmm0, %xmm1
+; X64-NEXT:    orpd %xmm2, %xmm1
+; X64-NEXT:    movapd %xmm1, %xmm2
+; X64-NEXT:    cmpunordsd %xmm1, %xmm2
+; X64-NEXT:    movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
+; X64-NEXT:    andpd %xmm2, %xmm0
+; X64-NEXT:    andnpd %xmm1, %xmm2
+; X64-NEXT:    orpd %xmm2, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_double_nsz:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; X64AVX-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm1
+; X64AVX-NEXT:    vcmpltsd %xmm0, %xmm1, %xmm2
+; X64AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm1
+; X64AVX-NEXT:    vblendvpd %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double_nsz:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm0, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpltsd %xmm0, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm1, %xmm1 {%k1}
+; X64AVX512FP16-NEXT:    vcmpunordsd %xmm1, %xmm1, %k1
+; X64AVX512FP16-NEXT:    vmovsd {{.*#+}} xmm1 {%k1} = [NaN,0.0E+0]
+; X64AVX512FP16-NEXT:    vmovapd %xmm1, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nsz double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nnan(double %x, double %y) {
+; X64-LABEL: maximumnum_double_nnan:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %xmm0, %rax
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    js .LBB5_1
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    movdqa %xmm0, %xmm2
+; X64-NEXT:    jmp .LBB5_3
+; X64-NEXT:  .LBB5_1:
+; X64-NEXT:    movdqa %xmm1, %xmm2
+; X64-NEXT:    movdqa %xmm0, %xmm1
+; X64-NEXT:  .LBB5_3:
+; X64-NEXT:    maxsd %xmm2, %xmm1
+; X64-NEXT:    movapd %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: maximumnum_double_nnan:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vmovq %xmm0, %rax
+; X64AVX-NEXT:    testq %rax, %rax
+; X64AVX-NEXT:    js .LBB5_1
+; X64AVX-NEXT:  # %bb.2:
+; X64AVX-NEXT:    vmaxsd %xmm0, %xmm1, %xmm0
+; X64AVX-NEXT:    retq
+; X64AVX-NEXT:  .LBB5_1:
+; X64AVX-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX-NEXT:    vmaxsd %xmm2, %xmm0, %xmm0
+; X64AVX-NEXT:    retq
+;
+; X64AVX512FP16-LABEL: maximumnum_double_nnan:
+; X64AVX512FP16:       # %bb.0:
+; X64AVX512FP16-NEXT:    vfpclasssd $3, %xmm0, %k1
+; X64AVX512FP16-NEXT:    vmovapd %xmm1, %xmm2
+; X64AVX512FP16-NEXT:    vmovsd %xmm0, %xmm2, %xmm2 {%k1}
+; X64AVX512FP16-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
+; X64AVX512FP16-NEXT:    vmaxsd %xmm2, %xmm0, %xmm0
+; X64AVX512FP16-NEXT:    retq
+  %z = call nnan double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define float @minimumnum_float(float %x, float %y) {
+; X64-LABEL: minimumnum_float:
+; X64:       # %bb.0:
+; X64-NEXT:    movaps %xmm0, %xmm2
+; X64-NEXT:    cmpunordss %xmm0, %xmm2
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:    andps %xmm1, %xmm3
+; X64-NEXT:    andnps %xmm0, %xmm2
+; X64-NEXT:    orps %xmm3, %xmm2
+; X64-NEXT:    movaps %xmm1, %xmm3
+; X64-NEXT:    cmpunordss %xmm1, %xmm3
+; X64-NEXT:    movaps %xmm3, %xmm0
+; X64-NEXT:    andps %xmm2, %xmm0
+; X64-NEXT:    andnps %xmm1, %xmm3
+; X64-NEXT:    orps %xmm0, %xmm3
+; X64-NEXT:    movaps %xmm2, %xmm0
+; X64-NEXT:    cmpltss %xmm3, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    andps %xmm2, %xmm1
+; X64-NEXT:    andnps %xmm3, %xmm0
+; X64-NEXT:    orps %xmm1, %xmm0
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    cmpunordss %xmm0, %xmm1
+; X64-NEXT:    movss {{.*#+}} xmm4 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT:    andps %xmm1, %xmm4
+; X64-NEXT:    andnps %xmm0, %xmm1
+; X64-NEXT:    orps %xmm1, %xmm4
+; X64-NEXT:    xorps %xmm1, %xmm1
+; X64-NEXT:    cmpeqss %xmm4, %xmm1
+; X64-NEXT:    movd %xmm2, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    jo .LBB6_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    movaps %xmm4, %xmm2
+; X64-NEXT:  .LBB6_2:
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    andnps %xmm4, %xmm0
+; X64-NEXT:    movd %xmm3, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    jo .LBB6_4
+; X64-NEXT:  # %bb.3:
+; X64-NEXT:    movaps %xmm2, %xmm3
+; X64-NEXT:  .LBB6_4:
+; X64-NEXT:    andps %xmm3, %xmm1
+; X64-NEXT:    orps %xmm1, %xmm0
+; X64-NEXT:    retq
+;
+; X64AVX-LABEL: minimumnum_float:
+; X64AVX:       # %bb.0:
+; X64AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
+; X64AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm2
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm0
+; X64AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; X64AVX-NEXT:    vcmpltss %xmm0, %xmm2, %xmm1
+; X64AVX-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm1
+; X64AVX-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm3
+; X64AVX-NEXT:    vblendvps %xmm3, {{\.?LCPI[0-9]+_[0-9]+...
[truncated]

arsenm

What does the expand action even do? We can't implement this in terms of any generic operations. Targets have to handle it somehow

arsenm · 2024-08-16T09:00:45Z

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

-    SDValue MinMaxQuiet =
-        DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
+    SDValue MinMaxQuiet;
+    if (isOperationLegalOrCustom(ISD::FCANONICALIZE, VT)) {


This is a different change from the default change and I also dont think should be done. The correct operation is a canonicalize. This just drops payload bits for no reason

Oh this is just throwing away the value altogether. We don't know this is a nan

We only use it if MinMax UO SETUO.

This is a different change from the default change and I also dont think should be done. The correct operation is a canonicalize. This just drops payload bits for no reason

This is only used if FCANONICALIZE is not available.

But a nan constant isn't a replacement for a canonicalize. You are breaking any non-nan inputs

But a nan constant isn't a replacement for a canonicalize. You are breaking any non-nan inputs

it won't break non-nan inputs, as we have

MinMax = DAG.getSelectCC(DL, MinMax, MinMax, MinMaxQuiet, MinMax, ISD::SETUO);

The MinMaxQuiet will be used only if MinMax UO MinMax.

You also can't do a signaling nan check with fcmp in strictfp functions. This cannot raise an exception

Thanks.

You also can't do a signaling nan check with fcmp in strictfp functions. This cannot raise an exception

I don't think that these expandFXXXX can meet the requirements of strictfp.
I guess only the architectures which has the exactly same instructions can do so.
Or maybe we can only use the way of soft float.

But in the canonicalize case you can just use the canonicalize result, you don't need the additional compare and select

OK. So let's close this PR.

pawan-nirpal-031 · 2024-08-16T15:56:42Z

Hi All, I have a patch for implementing fcanonicalize intrinsic for x86 backend, it handles fp80, bf16, f16, f32 and f64 inputs as well as denormals and other subnormals along these lines 9cd9071, And I canonicalize variables with mul by 1.0. Are the goals of this PR along the same lines? If yes It may not be required for me to submit another PR. If not do let me know, I will submit my PR, Thanks in advance.

RKSimon · 2024-08-16T16:06:35Z

@pawan-nirpal-031 Can we just mul * 1.0 as the default expansion for ISD::FCANONICALIZE, adding a TargetLowering::expandFCANONICALIZE method?

pawan-nirpal-031 · 2024-08-16T16:15:45Z

@pawan-nirpal-031 Can we just mul * 1.0 as the default expansion for ISD::FCANONICALIZE, adding a TargetLowering::expandFCANONICALIZE method?

Hi @RKSimon, Yes we could, I was doing this in instruction selection, and canonicalizing subnormals in legalizer phase, ftz and stuff, replacing snans by qnans, undefs to qnans. I had plans to handle these cases along with mul * 1.0 too. But I wanted to know if this PR does that, if It does than my PR may not be required. My PR aims to handle this intrinsic for constants/variables and subnormals for all scalar ( for now ) float types supported by x86. The idea is to have a standardized lowering for all kinds of values.

arsenm · 2024-08-16T17:50:32Z

@pawan-nirpal-031 Can we just mul * 1.0 as the default expansion for ISD::FCANONICALIZE, adding a TargetLowering::expandFCANONICALIZE method?

You cannot do that. A regular fmul does not have the same restrictions as fcanonicalize. A later canonicalize dropping combine could lose the required semantics

arsenm · 2024-08-16T17:52:22Z

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

+      // MIPS pre-R5 and HPPA use different encoding of qNaN and sNaN.
+      // ISD::FCANONICALIZE is supported by MIPS.
+      // HPPA is not supported by LLVM yet.
+      MinMaxQuiet =


A make_quiet would need to be is_signaling(x) ? qnan : x. Or preferably is_signaling(x) ? make_quiet(x) : x. But really should just use the canonicalize, and not workaround it not being implemented here

I am not trying to use make_quiet here. As it is not easy to do is_signaling.
What I am trying to do is inNaN(x) ? qnan : x.

The lack of support for canonicalize on a target is no reason to have this code here. Targets need to implement operations. If you really want to hack around this, this needs to be in a dedicated make-quiet helper function not directly used in this one context

arsenm · 2024-08-16T17:53:06Z

llvm/lib/Target/X86/X86ISelLowering.cpp

@@ -668,6 +669,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
      setOperationAction(ISD::FSIN   , VT, Expand);
      setOperationAction(ISD::FCOS   , VT, Expand);
      setOperationAction(ISD::FSINCOS, VT, Expand);
+
+      setOperationAction(ISD::FCANONICALIZE, VT, Expand);


This is just a workaround with no actual expand implementation

Okay, I figured it might be so, So I take it, I should continue to prepare my patch. I plan to implement similar to your AMD gpu implementation.

pawan-nirpal-031 · 2024-08-16T17:54:29Z

Hi @arsenm can you clarify my query? Is this PR trying to achieve the same goal as mine, If not I can keep preparing my patch, which I plan to submit in a few days.

arsenm · 2024-08-16T17:56:09Z

Hi @arsenm can you clarify my query? Is this PR trying to achieve the same goal as mine, If not I can keep preparing my patch, which I plan to submit in a few days.

It is not. Your PR would be a requirement for this to be usable on x86. X86 (and all targets) need to implement canonicalize, which cannot be done in terms of other generic floating point ops. It needs to be directly selected (or we need to rewrite the rules for DAG nodes to be different from IR, audit all the combines, and then suffer from having different rules from the IR)

pawan-nirpal-031 · 2024-08-16T17:57:26Z

Hi @arsenm can you clarify my query? Is this PR trying to achieve the same goal as mine, If not I can keep preparing my patch, which I plan to submit in a few days.

It is not. Your PR would be a requirement for this to be usable on x86. X86 (and all targets) need to implement canonicalize, which cannot be done in terms of other generic floating point ops. It needs to be directly selected (or we need to rewrite the rules for DAG nodes to be different from IR, audit all the combines, and then suffer from having different rules from the IR)

Thanks, my patch is under an internal review, I will raise a llorg PR in a few days.

RKSimon · 2024-08-19T10:18:20Z

@pawan-nirpal-031 Can we just mul * 1.0 as the default expansion for ISD::FCANONICALIZE, adding a TargetLowering::expandFCANONICALIZE method?

You cannot do that. A regular fmul does not have the same restrictions as fcanonicalize. A later canonicalize dropping combine could lose the required semantics

So we'd need ISD::STRICT_FMUL or is that still not enough?

arsenm · 2024-08-19T12:56:38Z

You cannot do that. A regular fmul does not have the same restrictions as fcanonicalize. A later canonicalize dropping combine could lose the required semantics

So we'd need ISD::STRICT_FMUL or is that still not enough?

That depends on how acceptable it is to introduce strict FP nodes in a non-strict function. The IR enforces that's not OK, but the DAG is more permissive (not necessarily by design?). In principle it should work.

wzssyqa · 2024-08-19T16:55:36Z

Hi All, I have a patch for implementing fcanonicalize intrinsic for x86 backend, it handles fp80, bf16, f16, f32 and f64 inputs as well as denormals and other subnormals along these lines 9cd9071, And I canonicalize variables with mul by 1.0. Are the goals of this PR along the same lines? If yes It may not be required for me to submit another PR. If not do let me know, I will submit my PR, Thanks in advance.

For me, I'd prefer it as a separated patch. If it can be accepted, I will be very happy to use it.

pawan-nirpal-031 · 2024-08-20T06:06:25Z

Hi All, I have a patch for implementing fcanonicalize intrinsic for x86 backend, it handles fp80, bf16, f16, f32 and f64 inputs as well as denormals and other subnormals along these lines 9cd9071, And I canonicalize variables with mul by 1.0. Are the goals of this PR along the same lines? If yes It may not be required for me to submit another PR. If not do let me know, I will submit my PR, Thanks in advance.

For me, I'd prefer it as a separated patch. If it can be accepted, I will be very happy to use it.

Hi Yes, My patch is ready and under an internal review, Once I get lgtm, I shall be raising llorg PR, expect it in coming days.

pawan-nirpal-031 · 2024-08-20T16:43:19Z

@arsenm How does AMDGPU backend prevent mul * 1.0 from being optimized away?

arsenm · 2024-08-20T16:44:07Z

@arsenm How does AMDGPU backend prevent mul * 1.0 from being optimized away?

It doesn't. This is why the canonicalize intrinsic exists. We just directly select it

pawan-nirpal-031 · 2024-08-28T11:36:40Z

FYR : #106370

wzssyqa · 2024-09-04T06:37:28Z

@arsenm is it acceptable if we don't require keeping payload?

arsenm · 2024-09-04T06:47:30Z

@arsenm is it acceptable if we don't require keeping payload?

Yes. Payload preservation is a nice to have, not a requirement

llvmbot added backend:X86 llvm:SelectionDAG SelectionDAGISel as well labels Aug 16, 2024

arsenm reviewed Aug 16, 2024

View reviewed changes

wzssyqa closed this Sep 5, 2024

SelectionDAG: Use qNaN constant if FCANONICALIZE not LegalOrCustom #104564

SelectionDAG: Use qNaN constant if FCANONICALIZE not LegalOrCustom #104564

Uh oh!

Conversation

wzssyqa commented Aug 16, 2024

Uh oh!

llvmbot commented Aug 16, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm left a comment

Choose a reason for hiding this comment

Uh oh!

arsenm Aug 16, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

pawan-nirpal-031 commented Aug 16, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

RKSimon commented Aug 16, 2024

Uh oh!

pawan-nirpal-031 commented Aug 16, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm commented Aug 16, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

pawan-nirpal-031 commented Aug 16, 2024

Uh oh!

arsenm commented Aug 16, 2024

Uh oh!

pawan-nirpal-031 commented Aug 16, 2024

Uh oh!

RKSimon commented Aug 19, 2024

Uh oh!

arsenm commented Aug 19, 2024

Uh oh!

wzssyqa commented Aug 19, 2024

Uh oh!

pawan-nirpal-031 commented Aug 20, 2024

Uh oh!

pawan-nirpal-031 commented Aug 20, 2024

Uh oh!

arsenm commented Aug 20, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

pawan-nirpal-031 commented Aug 28, 2024

Uh oh!

wzssyqa commented Sep 4, 2024

Uh oh!

arsenm commented Sep 4, 2024

Uh oh!

Uh oh!

llvmbot commented Aug 16, 2024 •

edited

Loading

arsenm Aug 16, 2024 •

edited

Loading

pawan-nirpal-031 commented Aug 16, 2024 •

edited

Loading

pawan-nirpal-031 commented Aug 16, 2024 •

edited

Loading

arsenm commented Aug 20, 2024 •

edited

Loading