From a824dede98e9a979dd432d0a72b01ad730474245 Mon Sep 17 00:00:00 2001 From: Pawan Anil Nirpal Date: Wed, 28 Aug 2024 13:09:30 +0200 Subject: [PATCH 01/10] [X86][SelectionDAG] - Add support for llvm.canonicalize intrinsic Enable support for fcanonicalize intrinsic lowering. --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 50 +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 46 +++ .../CodeGen/X86/canonicalize-constants.ll | 210 +++++++++++++ .../CodeGen/X86/canonicalize-subnormals.ll | 287 ++++++++++++++++++ llvm/test/CodeGen/X86/canonicalize-vars.ll | 193 ++++++++++++ 5 files changed, 786 insertions(+) create mode 100644 llvm/test/CodeGen/X86/canonicalize-constants.ll create mode 100644 llvm/test/CodeGen/X86/canonicalize-subnormals.ll create mode 100644 llvm/test/CodeGen/X86/canonicalize-vars.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 74e3a898569be..c1679b1002df5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1275,6 +1275,56 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } } break; + case ISD::FCANONICALIZE: { + const Triple &TT = DAG.getTarget().getTargetTriple(); + if (TT.getArch() == Triple::x86 || TT.getArch() == Triple::x86_64) { + SDValue Operand = Node->getOperand(0); + SDLoc dl(Node); + EVT VT = Operand.getValueType(); + + if (ConstantFPSDNode *CFP = dyn_cast(Operand)) { + const APFloat &C = CFP->getValueAPF(); + if (C.isDenormal()) { + DenormalMode Mode = + DAG.getMachineFunction().getDenormalMode(C.getSemantics()); + assert((Mode != DenormalMode::getPositiveZero()) && + "Positive denormal mode is not valid for X86 target."); + if (Mode == DenormalMode::getPreserveSign()) { + SDValue SDZero = + DAG.getConstantFP((C.isNegative() ? -0.0 : 0.0), dl, VT); + ConstantFPSDNode *ZeroConstFP = cast(SDZero); + SDValue CanonZeroFPLoad = ExpandConstantFP(ZeroConstFP, true); + DAG.ReplaceAllUsesWith(Node, CanonZeroFPLoad.getNode()); + LLVM_DEBUG(dbgs() + << "Legalized Denormal under mode PreserveSign\n"); + return; + } else if (Mode == DenormalMode::getIEEE()) { + DAG.ReplaceAllUsesWith(Node, Operand.getNode()); + LLVM_DEBUG(dbgs() << "Legalized Denormal under mode IEEE\n"); + return; + } + } else if (C.isNaN() && C.isSignaling()) { + APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics()); + SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT); + ConstantFPSDNode *QNaNConstFP = cast(QuitNaN); + SDValue QNanLoad = ExpandConstantFP(QNaNConstFP, true); + DAG.ReplaceAllUsesWith(Node, QNanLoad.getNode()); + LLVM_DEBUG(dbgs() << "Legalized Signaling NaN to Quiet NaN\n"); + return; + } + } else if (Operand.isUndef()) { + APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics()); + SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT); + ConstantFPSDNode *QNaNConstFP = cast(QuitNaN); + SDValue QNanLoad = ExpandConstantFP(QNaNConstFP, true); + DAG.ReplaceAllUsesWith(Node, QNanLoad.getNode()); + LLVM_DEBUG(dbgs() << "Legalized Undef to Quiet NaN\n"); + return; + } + break; + } + break; + } case ISD::FSHL: case ISD::FSHR: case ISD::SRL_PARTS: diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index d0a54ab8993c2..4bb8c9afd23ed 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5271,6 +5271,52 @@ void X86DAGToDAGISel::Select(SDNode *Node) { } break; } + case ISD::FCANONICALIZE: { + SDValue Operand = Node->getOperand(0); + EVT VT = Node->getValueType(0); + + // Perform canonicalization for constants. Replace the operand by a load + // from constant pool for this constant. At this point subnoraml values like + // denormals, snans have been canonicalized so no need to deal with those + // cases. + if (LoadSDNode *Load = dyn_cast(Operand)) { + const X86TargetLowering *X86Lowering = + static_cast(TLI); + if (const Constant *CV = X86Lowering->getTargetConstantFromLoad(Load)) { + const ConstantFP *CFP = dyn_cast(CV); + if (CFP) { + ReplaceNode(Node, Load); + return; + } + } + } + + // Canonicalize normal non-constant/non-undef FP Nodes. + SDValue MulNode; + SDValue One; + if (VT == MVT::f32 || VT == MVT::f64) { + One = CurDAG->getConstantFP(1.0f, dl, VT); + } else if (VT == MVT::f80) { + APFloat Val = APFloat::getOne(APFloat::x87DoubleExtended()); + One = CurDAG->getConstantFP(Val, dl, VT); + } else if (VT == MVT::f16) { + APFloat Val(APFloat::IEEEhalf(), "1.0"); + One = CurDAG->getConstantFP(Val, dl, VT); + } else if (VT == MVT::bf16) { + APFloat Val(APFloat::BFloat(), "1.0"); + One = CurDAG->getConstantFP(Val, dl, VT); + } else { + // Is it better to assert? when we encounter an unknown FP type,Than to + // just replace with the operand! As this might be our last attempt at + // legalization. + ReplaceNode(Node, Operand.getNode()); + return; + } + // TODO : Follow-up with tablegen pattern to generate mul * 1.0. + MulNode = CurDAG->getNode(ISD::FMUL, dl, VT, Operand, One); + ReplaceNode(Node, MulNode.getNode()); + return; + } case ISD::BRIND: case X86ISD::NT_BRIND: { if (Subtarget->isTargetNaCl()) diff --git a/llvm/test/CodeGen/X86/canonicalize-constants.ll b/llvm/test/CodeGen/X86/canonicalize-constants.ll new file mode 100644 index 0000000000000..b71c74bcd4472 --- /dev/null +++ b/llvm/test/CodeGen/X86/canonicalize-constants.ll @@ -0,0 +1,210 @@ +; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 < %s | FileCheck %s + +define float @canon_fp32() { + ; CHECK-LABEL: .LCPI0_0: + ; CHECK: .long 0x40400000 # float 3 + ; CHECK-LABEL: canon_fp32 + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovss .LCPI0_0(%rip), %xmm0 # xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] + ; CHECK-NEXT: retq + %canonicalized = call float @llvm.canonicalize.f32(float 3.0) + ret float %canonicalized +} + +define half @canon_fp16() { + ; CHECK-LABEL: .LCPI1_0: + ; CHECK: .short 0x4200 # half 3 + ; CHECK-LABEL: canon_fp16 + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovsh .LCPI1_0(%rip), %xmm0 + ; CHECK-NEXT: retq + %canonicalized = call half @llvm.canonicalize.f16(half 0xH4200) ; half 3.0 + ret half %canonicalized +} + +define double @canon_fp64() { + ; CHECK-LABEL: .LCPI2_0: + ; CHECK: .quad 0x4008000000000000 # double 3 + ; CHECK-LABEL: canon_fp64 + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovsd .LCPI2_0(%rip), %xmm0 + ; CHECK-NEXT: retq + %canonicalized = call double @llvm.canonicalize.f64(double 3.0) + ret double %canonicalized +} + +define x86_fp80 @canon_fp80() { + ; CHECK-LABEL: .LCPI3_0: + ; CHECK: .long 0x42b40000 # float 90 + ; CHECK-LABEL: canon_fp80 + ; CHECK: # %bb.0: + ; CHECK-NEXT: flds .LCPI3_0(%rip) + ; CHECK-NEXT: retq + + %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000) ; 90.0 + ret x86_fp80 %canonicalized +} + + +define x86_fp80 @complex_canonicalize_x86_fp80(x86_fp80 %a, x86_fp80 %b) { +entry: + ; CHECK-LABEL: .LCPI4_0: + ; CHECK: .long 0x42b40000 # float 90 + ; CHECK-LABEL: complex_canonicalize_x86_fp80 + ; CHECK: # %bb.0: + ; CHECK-NEXT: fldt 24(%rsp) + ; CHECK-NEXT: flds .LCPI4_0(%rip) + ; CHECK-NEXT: fsubp %st, %st(1) + ; CHECK-NEXT: retq + + %mul1 = fsub x86_fp80 %a, %b + %add = fadd x86_fp80 %mul1, %b + %mul2 = fsub x86_fp80 %add, %mul1 + %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000) + %result = fsub x86_fp80 %canonicalized, %b + ret x86_fp80 %result +} + +define double @complex_canonicalize_fp64(double %a, double %b) unnamed_addr #0 { +start: + ; CHECK-LABEL: .LCPI5_0: + ; CHECK: .quad 0x4008000000000000 # double 3 + ; CHECK-LABEL: complex_canonicalize_fp64 + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovsd .LCPI5_0(%rip), %xmm0 + ; CHECK-NEXT: retq + + %c = fcmp olt double %a, %b + %d = fcmp uno double %a, 0.000000e+00 + %or.cond.i.i = or i1 %d, %c + %e = select i1 %or.cond.i.i, double %b, double %a + %f = tail call double @llvm.canonicalize.f64(double 3.0) #2 + ret double %f +} + +define void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 { + ; CHECK-LAEBL: test_fold_canonicalize_p0_f32 + ; CHECK: # %bb.0: + ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 + ; CHECK-NEXT: vmovss %xmm0, (%rdi) + ; CHECK-NEXT: retq + %canonicalized = call float @llvm.canonicalize.f32(float 0.0) + store float %canonicalized, float addrspace(1)* %out + ret void +} + +define void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 { + ; CHECK-LAEBL: .LCPI7_0: + ; CHECK: .long 0x80000000 # float -0 + ; CHECK-LAEBL: test_fold_canonicalize_n0_f32 + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovss .LCPI7_0(%rip), %xmm0 + ; CHECK-NEXT: vmovss %xmm0, (%rdi) + ; CHECK-NEXT: retq + %canonicalized = call float @llvm.canonicalize.f32(float -0.0) + store float %canonicalized, float addrspace(1)* %out + ret void +} + + +define void @v_test_canonicalize_p90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { + ; CHECK-LAEBL: .LCPI8_0: + ; CHECK: .long 0x42b40000 # float 90 + ; CHECK-LAEBL: v_test_canonicalize_p90_x86_fp80 + ; CHECK: # %bb.0: + ; CHECK-NEXT: flds .LCPI8_0(%rip) + ; CHECK-NEXT: fstpt (%rdi) + ; CHECK-NEXT: retq + %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000) + store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out + ret void +} + +define void @v_test_canonicalize_p3__half(half addrspace(1)* %out) { + ; CHECK-LABEL: .LCPI9_0: + ; CHECK: .short 0x4200 # half 3 + ; CHECK-LABEL: v_test_canonicalize_p3__half: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: vmovsh .LCPI9_0(%rip), %xmm0 + ; CHECK-NEXT: vmovsh %xmm0, (%rdi) + ; CHECK-NEXT: retq + +entry: + %canonicalized = call half @llvm.canonicalize.f16(half 0xH4200) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +define void @v_test_canonicalize_p3_f64(double addrspace(1)* %out) #1 { + ; CHECK-LABEL: .LCPI10_0: + ; CHECK: .quad 0x4008000000000000 # double 3 + ; CHECK-LAEBL: v_test_canonicalize_p3_f64 + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovsd .LCPI10_0(%rip), %xmm0 + ; CHECK-NEXT: vmovsd %xmm0, (%rdi) + ; CHECK-NEXT: retq +entry: + %canonicalized = call double @llvm.canonicalize.f64(double 3.0) + store double %canonicalized, double addrspace(1)* %out + ret void +} + +define void @v_test_canonicalize_p3__bfloat(bfloat addrspace(1)* %out) { + ; CHECK-LABEL: .LCPI11_0: + ; CHECK: .long 0x40400000 # float 3 + ; CHECK-LABEL: v_test_canonicalize_p3__bfloat: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: vmovss .LCPI11_0(%rip), %xmm0 # xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] + ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 + ; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi) + ; CHECK-NEXT: retq + +entry: + %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat 3.0) + store bfloat %canonicalized, bfloat addrspace(1)* %out + ret void +} + +define void @v_test_canonicalize_n3__bfloat(bfloat addrspace(1)* %out) { + ; CHECK-LABEL: .LCPI12_0: + ; CHECK: .long 0xc0400000 # float -3 + ; CHECK-LABEL: v_test_canonicalize_n3__bfloat: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: vmovss .LCPI12_0(%rip), %xmm0 # xmm0 = [-3.0E+0,0.0E+0,0.0E+0,0.0E+0] + ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 + ; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi) + ; CHECK-NEXT: retq + +entry: + %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat -3.0) + store bfloat %canonicalized, bfloat addrspace(1)* %out + ret void +} + +define void @v_test_canonicalize_n90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { + ; CHECK-LAEBL: .LCPI13_0: + ; CHECK: .long 0xc2b40000 # float -90 + ; CHECK-LAEBL: v_test_canonicalize_n90_x86_fp80 + ; CHECK: # %bb.0: + ; CHECK-NEXT: flds .LCPI13_0(%rip) + ; CHECK-NEXT: fstpt (%rdi) + ; CHECK-NEXT: retq + %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xKC005B400000000000000) + store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out + ret void +} + +define void @v_test_canonicalize_n3__half(half addrspace(1)* %out) { + ; CHECK-LABEL: .LCPI14_0: + ; CHECK: .short 0xc200 # half -3 + ; CHECK-LABEL: v_test_canonicalize_n3__half: + ; CHECK: # %bb.0: # %entry + ; CHECK-NEXT: vmovsh .LCPI14_0(%rip), %xmm0 + ; CHECK-NEXT: vmovsh %xmm0, (%rdi) + ; CHECK-NEXT: retq + +entry: + %canonicalized = call half @llvm.canonicalize.f16(half 0xHC200) + store half %canonicalized, half addrspace(1)* %out + ret void +} \ No newline at end of file diff --git a/llvm/test/CodeGen/X86/canonicalize-subnormals.ll b/llvm/test/CodeGen/X86/canonicalize-subnormals.ll new file mode 100644 index 0000000000000..8e7e04c2a67dc --- /dev/null +++ b/llvm/test/CodeGen/X86/canonicalize-subnormals.ll @@ -0,0 +1,287 @@ +; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck %s +; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL %s +; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL %s + +define void @canonicalize_denormal1_f32_pre_sign(float addrspace(1)* %out) { + ; CHECK-LABEL: .LCPI0_0: + ; CHECK: .long 0x80000000 # float -0 + ; CHECK-LABEL: canonicalize_denormal1_f32_pre_sign: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovss .LCPI0_0(%rip), %xmm0 + ; CHECK-NEXT: vmovss %xmm0, (%rdi) + ; CHECK-NEXT: retq + + %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) + store float %canonicalized, float addrspace(1)* %out + ret void +} + +define void @canonicalize_denormal1_f64_pre_sign(double addrspace(1)* %out) { + ; CHECK-LABEL: .LCPI1_0: + ; CHECK: .quad 0x8000000000000000 # double -0 + ; CHECK-LABEL: canonicalize_denormal1_f64_pre_sign: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovsd .LCPI1_0(%rip), %xmm0 + ; CHECK-NEXT: vmovsd %xmm0, (%rdi) + ; CHECK-NEXT: retq + + %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) + store double %canonicalized, double addrspace(1)* %out + ret void +} + + +define void @canonicalize_qnan_f64(double addrspace(1)* %out) { + ;cCHECK-LABEL: .LCPI2_0: + ;cCHECK: .quad 0x7ff8000000000000 # double NaN + ; CHECK-LABEL: canonicalize_qnan_f64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovsd .LCPI2_0(%rip), %xmm0 + ; CHECK-NEXT: vmovsd %xmm0, (%rdi) + ; CHECK-NEXT: retq + + %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000) + store double %canonicalized, double addrspace(1)* %out + ret void +} + +define void @canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) { + ;cCHECK-LABEL: .LCPI3_0: + ;cCHECK: .quad 0xffffffffffffffff # double NaN + ; CHECK-LABEL: canonicalize_qnan_value_neg1_f64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovsd .LCPI3_0(%rip), %xmm0 + ; CHECK-NEXT: vmovsd %xmm0, (%rdi) + ; CHECK-NEXT: retq + + %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double)) + store double %canonicalized, double addrspace(1)* %out + ret void +} + +define void @canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) { + ; CHECK-LABEL: .LCPI4_0: + ; CHECK: .quad 0xfffffffffffffffe # double NaN + ; CHECK-LABEL: canonicalize_qnan_value_neg2_f64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovsd .LCPI4_0(%rip), %xmm0 + ; CHECK-NEXT: vmovsd %xmm0, (%rdi) + ; CHECK-NEXT: retq + + %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double)) + store double %canonicalized, double addrspace(1)* %out + ret void +} + +define void @canonicalize_snan0_value_f64(double addrspace(1)* %out) { + ; CHECK-LABEL: .LCPI5_0: + ; CHECK: .quad 0x7ff8000000000000 # double NaN + ; CHECK-LABEL: canonicalize_snan0_value_f64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovsd .LCPI5_0(%rip), %xmm0 + ; CHECK-NEXT: vmovsd %xmm0, (%rdi) + ; CHECK-NEXT: retq + + %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double)) + store double %canonicalized, double addrspace(1)* %out + ret void +} + +define void @canonicalize_undef(double addrspace(1)* %out) { + ; CHECK-LABEL: .LCPI6_0: + ; CHECK: .quad 0x7ff8000000000000 # double NaN + ; CHECK-LABEL: canonicalize_undef: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovsd .LCPI6_0(%rip), %xmm0 + ; CHECK-NEXT: vmovsd %xmm0, (%rdi) + ; CHECK-NEXT: retq + + %canonicalized = call double @llvm.canonicalize.f64(double undef) + store double %canonicalized, double addrspace(1)* %out + ret void +} + +define void @canonicalize_denormal1_f32_ieee(float addrspace(1)* %out) { + ; IEEE-DENORMAL-LABEL: .LCPI7_0: + ; IEEE-DENORMAL: .long 0x807fffff # float -1.17549421E-38 + ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_f32_ieee: + ; IEEE-DENORMAL: # %bb.0: + ; IEEE-DENORMAL-NEXT: vmovss .LCPI7_0(%rip), %xmm0 + ; IEEE-DENORMAL-NEXT: vmovss %xmm0, (%rdi) + ; IEEE-DENORMAL-NEXT: retq + + %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) + store float %canonicalized, float addrspace(1)* %out + ret void +} + +define void @canonicalize_denormal1_f64_ieee(double addrspace(1)* %out) { + ; IEEE-DENORMAL-LABEL: .LCPI8_0: + ; IEEE-DENORMAL: .quad 0x800fffffffffffff # double -2.2250738585072009E-308 + ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_f64_ieee: + ; IEEE-DENORMAL: # %bb.0: + ; IEEE-DENORMAL-NEXT: vmovsd .LCPI8_0(%rip), %xmm0 + ; IEEE-DENORMAL-NEXT: vmovsd %xmm0, (%rdi) + ; IEEE-DENORMAL-NEXT: retq + + %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) + store double %canonicalized, double addrspace(1)* %out + ret void +} + +define void @canonicalize_denormal1_f32_dynamic(float addrspace(1)* %out) { + ; DYN-DENORMAL-LABEL: .LCPI9_0: + ; DYN-DENORMAL: .long 0x807fffff # float -1.17549421E-38 + ; DYN-DENORMAL-LABEL: canonicalize_denormal1_f32_dynamic: + ; DYN-DENORMAL: # %bb.0: + ; DYN-DENORMAL-NEXT: vmovss .LCPI9_0(%rip), %xmm0 + ; DYN-DENORMAL-NEXT: vmovss %xmm0, (%rdi) + ; DYN-DENORMAL-NEXT: retq + + %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) + store float %canonicalized, float addrspace(1)* %out + ret void +} + +define void @canonicalize_denormal1_f64_dynamic(double addrspace(1)* %out) { + ; DYN-DENORMAL-LABEL: .LCPI10_0: + ; DYN-DENORMAL: .quad 0x800fffffffffffff # double -2.2250738585072009E-308 + ; DYN-DENORMAL-LABEL: canonicalize_denormal1_f64_dynamic: + ; DYN-DENORMAL: # %bb.0: + ; DYN-DENORMAL-NEXT: vmovsd .LCPI10_0(%rip), %xmm0 + ; DYN-DENORMAL-NEXT: vmovsd %xmm0, (%rdi) + ; DYN-DENORMAL-NEXT: retq + + %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) + store double %canonicalized, double addrspace(1)* %out + ret void +} + +define void @canonicalize_denormal1_bfloat_pre_sign(bfloat addrspace(1)* %out) { + ; CHECK-LABEL: .LCPI11_0: + ; CHECK: .long 0x80000000 # float -0 + ; CHECK-LABEL: canonicalize_denormal1_bfloat_pre_sign: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovss .LCPI11_0(%rip), %xmm0 + ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 + ; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi) + ; CHECK-NEXT: retq + + %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat)) + store bfloat %canonicalized, bfloat addrspace(1)* %out + ret void +} + + +define void @canonicalize_denormal1_bfloat_ieee(bfloat addrspace(1)* %out) { + ; IEEE-DENORMAL-LABEL: .LCPI12_0: + ; IEEE-DENORMAL: .long 0x80000000 # float -0 + ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_bfloat_ieee: + ; IEEE-DENORMAL: # %bb.0: + ; IEEE-DENORMAL-NEXT: vmovss .LCPI12_0(%rip), %xmm0 + ; IEEE-DENORMAL-NEXT: vcvtneps2bf16 %xmm0, %xmm0 + ; IEEE-DENORMAL-NEXT: vpextrw $0, %xmm0, (%rdi) + ; IEEE-DENORMAL-NEXT: retq + + %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat)) + store bfloat %canonicalized, bfloat addrspace(1)* %out + ret void +} + + +define void @canonicalize_denormal1_bfloat_dynamic(bfloat addrspace(1)* %out) { + ; DYN-DENORMAL-LABEL: .LCPI13_0: + ; DYN-DENORMAL: .long 0x80000000 # float -0 + ; DYN-DENORMAL-LABEL: canonicalize_denormal1_bfloat_dynamic: + ; DYN-DENORMAL: # %bb.0: + ; DYN-DENORMAL-NEXT: vmovss .LCPI13_0(%rip), %xmm0 + ; DYN-DENORMAL-NEXT: vcvtneps2bf16 %xmm0, %xmm0 + ; DYN-DENORMAL-NEXT: vpextrw $0, %xmm0, (%rdi) + ; DYN-DENORMAL-NEXT: retq + + %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat)) + store bfloat %canonicalized, bfloat addrspace(1)* %out + ret void +} + +define void @canonicalize_denormal1_half_pre_sign(half addrspace(1)* %out) { + ; CHECK-LABEL: .LCPI14_0: + ; CHECK: .short 0x8000 # half -0 + ; CHECK-LABEL: canonicalize_denormal1_half_pre_sign: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovsh .LCPI14_0(%rip), %xmm0 + ; CHECK-NEXT: vmovsh %xmm0, (%rdi) + ; CHECK-NEXT: retq + + %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half)) + store half %canonicalized, half addrspace(1)* %out + ret void +} + + +define void @canonicalize_denormal1_half_ieee(half addrspace(1)* %out) { + ; IEEE-DENORMAL-LABEL: .LCPI15_0: + ; IEEE-DENORMAL: .short 0x8000 # half -0 + ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_half_ieee: + ; IEEE-DENORMAL: # %bb.0: + ; IEEE-DENORMAL-NEXT: vmovsh .LCPI15_0(%rip), %xmm0 + ; IEEE-DENORMAL-NEXT: vmovsh %xmm0, (%rdi) + ; IEEE-DENORMAL-NEXT: retq + + %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half)) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +define void @canonicalize_denormal1_half_dynamic(half addrspace(1)* %out) { + ; DYN-DENORMAL-LABEL: .LCPI16_0: + ; DYN-DENORMAL: .short 0x8000 # half -0 + ; DYN-DENORMAL-LABEL: canonicalize_denormal1_half_dynamic: + ; DYN-DENORMAL: # %bb.0: + ; DYN-DENORMAL-NEXT: vmovsh .LCPI16_0(%rip), %xmm0 + ; DYN-DENORMAL-NEXT: vmovsh %xmm0, (%rdi) + ; DYN-DENORMAL-NEXT: retq + + %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half)) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +define void @canonicalize_denormal1_x86_fp80_pre_sign(x86_fp80 addrspace(1)* %out) { + ; CHECK-LAEBL: .LCPI17_0: + ; CHECK: .long 0x00000000 # float 0 + ; CHECK-LAEBL: canonicalize_denormal1_x86_fp80_pre_sign + ; CHECK: # %bb.0: + ; CHECK-NEXT: flds .LCPI17_0(%rip) + ; CHECK-NEXT: fstpt (%rdi) + ; CHECK-NEXT: retq + %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001) + store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out + ret void +} + +define void @canonicalize_denormal1_x86_fp80_dynamic(x86_fp80 addrspace(1)* %out) { + ; DYN-DENORMAL-LAEBL: .LCPI17_0: + ; DYN-DENORMAL: .quad 0x0000000000000001 # x86_fp80 3.64519953188247460253E-4951 + ; DYN-DENORMAL-LAEBL: canonicalize_denormal1_x86_fp80_dynamic + ; DYN-DENORMAL: # %bb.0: + ; DYN-DENORMAL-NEXT: fldt .LCPI17_0(%rip) + ; DYN-DENORMAL-NEXT: fstpt (%rdi) + ; DYN-DENORMAL-NEXT: retq + %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001) + store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out + ret void +} + +define void @canonicalize_denormal1_x86_fp80_ieee(x86_fp80 addrspace(1)* %out) { + ; IEEE-DENORMAL-LAEBL: .LCPI17_0: + ; IEEE-DENORMAL: .quad 0x0000000000000001 # x86_fp80 3.64519953188247460253E-4951 + ; IEEE-DENORMAL-LAEBL: canonicalize_denormal1_x86_fp80_ieee + ; IEEE-DENORMAL: # %bb.0: + ; IEEE-DENORMAL-NEXT: fldt .LCPI17_0(%rip) + ; IEEE-DENORMAL-NEXT: fstpt (%rdi) + ; IEEE-DENORMAL-NEXT: retq + %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001) + store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out + ret void +} \ No newline at end of file diff --git a/llvm/test/CodeGen/X86/canonicalize-vars.ll b/llvm/test/CodeGen/X86/canonicalize-vars.ll new file mode 100644 index 0000000000000..c1b5dd0dddcd2 --- /dev/null +++ b/llvm/test/CodeGen/X86/canonicalize-vars.ll @@ -0,0 +1,193 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5 +; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 < %s | FileCheck %s + +define half @complex_canonicalize_fmul_half(half %a, half %b) { +; CHECK-LABEL: complex_canonicalize_fmul_half: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm2 +; CHECK-NEXT: vsubsh %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq +entry: + + %mul1 = fsub half %a, %b + %add = fadd half %mul1, %b + %mul2 = fsub half %add, %mul1 + %canonicalized = call half @llvm.canonicalize.f16(half %mul2) + %result = fsub half %canonicalized, %b + ret half %result +} + +define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) { +entry: + ; CHECK-LABEL: complex_canonicalize_fmul_x86_fp80 + ; CHECK: # %bb.0: + ; CHECK-NEXT: fldt 24(%rsp) + ; CHECK-NEXT: fldt 8(%rsp) + ; CHECK-NEXT: fsub %st(1), %st + ; CHECK-NEXT: fld %st(0) + ; CHECK-NEXT: fadd %st(2), %st + ; CHECK-NEXT: fsubp %st, %st(1) + ; CHECK-NEXT: fsubp %st, %st(1) + ; CHECK-NEXT: retq + + %mul1 = fsub x86_fp80 %a, %b + %add = fadd x86_fp80 %mul1, %b + %mul2 = fsub x86_fp80 %add, %mul1 + %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %mul2) + %result = fsub x86_fp80 %canonicalized, %b + ret x86_fp80 %result +} + +define bfloat @complex_canonicalize_fmul_bfloat(bfloat %a, bfloat %b) { +; CHECK-LABEL: complex_canonicalize_fmul_bfloat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovw %xmm0, %eax +; CHECK-NEXT: vmovw %xmm1, %ecx +; CHECK-NEXT: shll $16, %ecx +; CHECK-NEXT: vmovd %ecx, %xmm0 +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: vmovd %eax, %xmm1 +; CHECK-NEXT: vsubss %xmm0, %xmm1, %xmm1 +; CHECK-NEXT: vcvtneps2bf16 %xmm1, %xmm1 +; CHECK-NEXT: vmovw %xmm1, %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: vmovd %eax, %xmm1 +; CHECK-NEXT: vaddss %xmm0, %xmm1, %xmm2 +; CHECK-NEXT: vcvtneps2bf16 %xmm2, %xmm2 +; CHECK-NEXT: vmovw %xmm2, %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: vmovd %eax, %xmm2 +; CHECK-NEXT: vsubss %xmm1, %xmm2, %xmm1 +; CHECK-NEXT: vcvtneps2bf16 %xmm1, %xmm1 +; CHECK-NEXT: vmovw %xmm1, %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: vmovd %eax, %xmm1 +; CHECK-NEXT: vcvtneps2bf16 %xmm1, %xmm1 +; CHECK-NEXT: vmovw %xmm1, %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: vmovd %eax, %xmm1 +; CHECK-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 +; CHECK-NEXT: vmovw %xmm0, %eax +; CHECK-NEXT: vmovw %eax, %xmm0 +; CHECK-NEXT: retq + +entry: + + %sub1 = fsub bfloat %a, %b + %add = fadd bfloat %sub1, %b + %sub2 = fsub bfloat %add, %sub1 + %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %sub2) + %result = fsub bfloat %canonicalized, %b + ret bfloat %result +} + +define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { +start: + ; CHECK-LABEL: canonicalize_fp64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 + ; CHECK-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 + ; CHECK-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} + ; CHECK-NEXT: vmovapd %xmm2, %xmm0 + ; CHECK-NEXT: retq + + %c = fcmp olt double %a, %b + %d = fcmp uno double %a, 0.000000e+00 + %or.cond.i.i = or i1 %d, %c + %e = select i1 %or.cond.i.i, double %b, double %a + %f = tail call double @llvm.canonicalize.f64(double %e) #2 + ret double %f +} + +define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 { +start: + ; CHECK-LABEL: canonicalize_fp32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmaxss %xmm0, %xmm1, %xmm2 + ; CHECK-NEXT: vcmpunordss %xmm0, %xmm0, %k1 + ; CHECK-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} + ; CHECK-NEXT: vmovaps %xmm2, %xmm0 + ; CHECK-NEXT: retq + + %cc = fcmp olt float %aa, %bb + %dd = fcmp uno float %aa, 0.000000e+00 + %or.cond.i.i.x = or i1 %dd, %cc + %ee = select i1 %or.cond.i.i.x, float %bb, float %aa + %ff = tail call float @llvm.canonicalize.f32(float %ee) #2 + ret float %ff +} + +define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 { + ; CHECK-LAEBL: v_test_canonicalize_var_f32 + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovss (%rdi), %xmm0 + ; CHECK-NEXT: vmovss %xmm0, (%rdi) + ; CHECK-NEXT: retq + %val = load float, float addrspace(1)* %out + %canonicalized = call float @llvm.canonicalize.f32(float %val) + store float %canonicalized, float addrspace(1)* %out + ret void +} + +define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { + ; CHECK-LAEBL: v_test_canonicalize_x86_fp80 + ; CHECK: # %bb.0: + ; CHECK-NEXT: fldt (%rdi) + ; CHECK-NEXT: fstpt (%rdi) + ; CHECK-NEXT: retq + %val = load x86_fp80, x86_fp80 addrspace(1)* %out + %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %val) + store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out + ret void +} + +define void @v_test_canonicalize__half(half addrspace(1)* %out) { +; CHECK-LABEL: v_test_canonicalize__half: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmovsh (%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vmovsh %xmm0, (%rdi) +; CHECK-NEXT: retq +entry: + %val = load half, half addrspace(1)* %out + %canonicalized = call half @llvm.canonicalize.f16(half %val) + store half %canonicalized, half addrspace(1)* %out + ret void +} + +define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 { + ; CHECK-LAEBL: v_test_canonicalize_var_f64 + ; CHECK: # %bb.0: + ; CHECK-NEXT: vmovsd (%rdi), %xmm0 # xmm0 = mem[0],zero + ; CHECK-NEXT: vmovsd %xmm0, (%rdi) + ; CHECK-NEXT: retq + %val = load double, double addrspace(1)* %out + %canonicalized = call double @llvm.canonicalize.f64(double %val) + store double %canonicalized, double addrspace(1)* %out + ret void +} + +define void @v_test_canonicalize__bfloat(bfloat addrspace(1)* %out) { +; CHECK-LABEL: v_test_canonicalize__bfloat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: vmovd %eax, %xmm0 +; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 +; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi) +; CHECK-NEXT: retq + +entry: + %val = load bfloat, bfloat addrspace(1)* %out + %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %val) + store bfloat %canonicalized, bfloat addrspace(1)* %out + ret void +} + +declare double @llvm.canonicalize.f64(double) +declare float @llvm.canonicalize.f32(float) +declare bfloat @llvm.canonicalize.bf16(bfloat) +declare x86_fp80 @llvm.canonicalize.f80(x86_fp80) +declare half @llvm.canonicalize.f16(half) \ No newline at end of file From 34d5244817bcd98c50bffea2a551b5b94722d855 Mon Sep 17 00:00:00 2001 From: Pawan Anil Nirpal Date: Fri, 6 Sep 2024 09:24:44 +0200 Subject: [PATCH 02/10] Move combine operations to DAG combiner over from legalizer, address comments --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 50 - llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 46 - llvm/lib/Target/X86/X86ISelLowering.cpp | 120 + .../CodeGen/X86/canonicalize-constants.ll | 593 ++++- .../CodeGen/X86/canonicalize-subnormals.ll | 1929 +++++++++++++++-- llvm/test/CodeGen/X86/canonicalize-vars.ll | 997 ++++++++- 6 files changed, 3270 insertions(+), 465 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index c1679b1002df5..74e3a898569be 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1275,56 +1275,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } } break; - case ISD::FCANONICALIZE: { - const Triple &TT = DAG.getTarget().getTargetTriple(); - if (TT.getArch() == Triple::x86 || TT.getArch() == Triple::x86_64) { - SDValue Operand = Node->getOperand(0); - SDLoc dl(Node); - EVT VT = Operand.getValueType(); - - if (ConstantFPSDNode *CFP = dyn_cast(Operand)) { - const APFloat &C = CFP->getValueAPF(); - if (C.isDenormal()) { - DenormalMode Mode = - DAG.getMachineFunction().getDenormalMode(C.getSemantics()); - assert((Mode != DenormalMode::getPositiveZero()) && - "Positive denormal mode is not valid for X86 target."); - if (Mode == DenormalMode::getPreserveSign()) { - SDValue SDZero = - DAG.getConstantFP((C.isNegative() ? -0.0 : 0.0), dl, VT); - ConstantFPSDNode *ZeroConstFP = cast(SDZero); - SDValue CanonZeroFPLoad = ExpandConstantFP(ZeroConstFP, true); - DAG.ReplaceAllUsesWith(Node, CanonZeroFPLoad.getNode()); - LLVM_DEBUG(dbgs() - << "Legalized Denormal under mode PreserveSign\n"); - return; - } else if (Mode == DenormalMode::getIEEE()) { - DAG.ReplaceAllUsesWith(Node, Operand.getNode()); - LLVM_DEBUG(dbgs() << "Legalized Denormal under mode IEEE\n"); - return; - } - } else if (C.isNaN() && C.isSignaling()) { - APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics()); - SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT); - ConstantFPSDNode *QNaNConstFP = cast(QuitNaN); - SDValue QNanLoad = ExpandConstantFP(QNaNConstFP, true); - DAG.ReplaceAllUsesWith(Node, QNanLoad.getNode()); - LLVM_DEBUG(dbgs() << "Legalized Signaling NaN to Quiet NaN\n"); - return; - } - } else if (Operand.isUndef()) { - APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics()); - SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT); - ConstantFPSDNode *QNaNConstFP = cast(QuitNaN); - SDValue QNanLoad = ExpandConstantFP(QNaNConstFP, true); - DAG.ReplaceAllUsesWith(Node, QNanLoad.getNode()); - LLVM_DEBUG(dbgs() << "Legalized Undef to Quiet NaN\n"); - return; - } - break; - } - break; - } case ISD::FSHL: case ISD::FSHR: case ISD::SRL_PARTS: diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 4bb8c9afd23ed..d0a54ab8993c2 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5271,52 +5271,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) { } break; } - case ISD::FCANONICALIZE: { - SDValue Operand = Node->getOperand(0); - EVT VT = Node->getValueType(0); - - // Perform canonicalization for constants. Replace the operand by a load - // from constant pool for this constant. At this point subnoraml values like - // denormals, snans have been canonicalized so no need to deal with those - // cases. - if (LoadSDNode *Load = dyn_cast(Operand)) { - const X86TargetLowering *X86Lowering = - static_cast(TLI); - if (const Constant *CV = X86Lowering->getTargetConstantFromLoad(Load)) { - const ConstantFP *CFP = dyn_cast(CV); - if (CFP) { - ReplaceNode(Node, Load); - return; - } - } - } - - // Canonicalize normal non-constant/non-undef FP Nodes. - SDValue MulNode; - SDValue One; - if (VT == MVT::f32 || VT == MVT::f64) { - One = CurDAG->getConstantFP(1.0f, dl, VT); - } else if (VT == MVT::f80) { - APFloat Val = APFloat::getOne(APFloat::x87DoubleExtended()); - One = CurDAG->getConstantFP(Val, dl, VT); - } else if (VT == MVT::f16) { - APFloat Val(APFloat::IEEEhalf(), "1.0"); - One = CurDAG->getConstantFP(Val, dl, VT); - } else if (VT == MVT::bf16) { - APFloat Val(APFloat::BFloat(), "1.0"); - One = CurDAG->getConstantFP(Val, dl, VT); - } else { - // Is it better to assert? when we encounter an unknown FP type,Than to - // just replace with the operand! As this might be our last attempt at - // legalization. - ReplaceNode(Node, Operand.getNode()); - return; - } - // TODO : Follow-up with tablegen pattern to generate mul * 1.0. - MulNode = CurDAG->getNode(ISD::FMUL, dl, VT, Operand, One); - ReplaceNode(Node, MulNode.getNode()); - return; - } case ISD::BRIND: case X86ISD::NT_BRIND: { if (Subtarget->isTargetNaCl()) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1a6be4eb5af1e..4fc7c70764f56 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2533,6 +2533,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::STRICT_FMA, ISD::FMINNUM, ISD::FMAXNUM, + ISD::FCANONICALIZE, ISD::SUB, ISD::LOAD, ISD::LRINT, @@ -57976,6 +57977,124 @@ static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG, return SDValue(); } +SDValue combineConstantCanonicalize(SDNode *Node, SelectionDAG &DAG) { + SDValue Operand = Node->getOperand(0); + SDLoc dl(Node); + EVT VT = Operand.getValueType(); + if (ConstantFPSDNode *CFP = dyn_cast(Operand)) { + const APFloat &C = CFP->getValueAPF(); + if (C.isDenormal()) { + DenormalMode Mode = + DAG.getMachineFunction().getDenormalMode(C.getSemantics()); + assert((Mode != DenormalMode::getPositiveZero()) && + "Positive denormal mode is not valid for X86 target."); + if (Mode == DenormalMode::getPreserveSign()) { + SDValue SDZero = + DAG.getConstantFP((C.isNegative() ? -0.0 : 0.0), dl, VT); + return SDZero; + } else if (Mode == DenormalMode::getIEEE()) { + return Operand; + } + } else if (C.isNaN() && C.isSignaling()) { + APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics()); + SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT); + return QuitNaN; + } + } + return Operand; +} + +SDValue findLastStrictOpChain(SDNode *N, SelectionDAG &DAG) { + assert(N!=nullptr && "Trying to find last chain for a NULL Node"); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + if (Op.getValueType() == MVT::Other && Op.getNode()->isStrictFPOpcode()) + return Op; + } + return DAG.getEntryNode(); +} + +bool isNonCanonicalizingOperation(SDNode *N) { + assert(N!=nullptr && "Trying to check canonical opcode for a NULL Node"); + unsigned Opc = N->getOpcode(); + switch (Opc) { + // Ensure these are the exasustive set of non canonicalizing opcodes. Add more + // if not. + case X86::RET: + case ISD::STORE: + case ISD::SETCC: + case X86ISD::FCMP: + return true; + default: + return false; + } +} + +bool isUsedByNonCanonicalizingOp(SDNode *N) { + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E; + ++UI) { + SDNode *User = *UI; + if (isNonCanonicalizingOperation(User)) + return true; + } + return false; +} + +SDValue combineCanonicalize(SDNode *Node, SelectionDAG &DAG) { + SDValue Operand = Node->getOperand(0); + EVT VT = Operand.getValueType(); + SDLoc dl(Node); + + if (auto *CFP = dyn_cast(Operand)) + return combineConstantCanonicalize(Node, DAG); + + if (Operand.isUndef()) { + APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics()); + SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT); + return QuitNaN; + } + + // Canonicalize scalar variable FP Nodes. + SDValue MulNode; + SDValue One; + if (VT == MVT::f32 || VT == MVT::f64) { + One = DAG.getConstantFP(1.0f, dl, VT); + } else if (VT == MVT::f80) { + APFloat Val = APFloat::getOne(APFloat::x87DoubleExtended()); + One = DAG.getConstantFP(Val, dl, VT); + } else if (VT == MVT::f16) { + APFloat Val(APFloat::IEEEhalf(), "1.0"); + One = DAG.getConstantFP(Val, dl, VT); + } else if (VT == MVT::bf16) { + APFloat Val(APFloat::BFloat(), "1.0"); + One = DAG.getConstantFP(Val, dl, VT); + } else { + // Is it better to assert? when we encounter an unknown FP type,Than to + // just replace with the operand! As this might be our last attempt at + // legalization. + return Operand; + } + + // Store, return, and compare are non-canonicalizing operations. If a + // non-canonicalizing operation uses the rest then mul * 1.0 must be generated + // int those cases. + // TODO: For now Preventing bf16 from generating strict_fmul as it + // leads to a crash SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0, + // ConstantFP:bf16, t5 LLVM ERROR: Do not know how to soft + // promote this operator's result! + if (isUsedByNonCanonicalizingOp(Node) && VT != MVT::bf16) { + SDValue Chain = findLastStrictOpChain(Node, DAG); + // TODO : Follow-up with tablegen pattern to generate mul * 1.0. + SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other}, + {Chain, One, Operand}); + + return StrictFmul; + } + + return Operand; + // TODO : Hanlde vectors. +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -58015,6 +58134,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); case ISD::OR: return combineOr(N, DAG, DCI, Subtarget); case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget); + case ISD::FCANONICALIZE: return combineCanonicalize(N,DAG); case ISD::BITREVERSE: return combineBITREVERSE(N, DAG, DCI, Subtarget); case ISD::AVGCEILS: case ISD::AVGCEILU: diff --git a/llvm/test/CodeGen/X86/canonicalize-constants.ll b/llvm/test/CodeGen/X86/canonicalize-constants.ll index b71c74bcd4472..b1a9733806d40 100644 --- a/llvm/test/CodeGen/X86/canonicalize-constants.ll +++ b/llvm/test/CodeGen/X86/canonicalize-constants.ll @@ -1,62 +1,185 @@ -; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5 +; RUN: llc -mattr=sse -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE +; RUN: llc -mattr=sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE2 +; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX +; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX2 +; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512F +; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512BW define float @canon_fp32() { - ; CHECK-LABEL: .LCPI0_0: - ; CHECK: .long 0x40400000 # float 3 - ; CHECK-LABEL: canon_fp32 - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovss .LCPI0_0(%rip), %xmm0 # xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] - ; CHECK-NEXT: retq +; SSE-LABEL: canon_fp32: +; SSE: # %bb.0: +; SSE-NEXT: movss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] +; SSE-NEXT: retq +; +; SSE2-LABEL: canon_fp32: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] +; SSE2-NEXT: retq +; +; AVX-LABEL: canon_fp32: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: retq +; +; AVX2-LABEL: canon_fp32: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX2-NEXT: retq +; +; AVX512F-LABEL: canon_fp32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: canon_fp32: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX512BW-NEXT: retq %canonicalized = call float @llvm.canonicalize.f32(float 3.0) ret float %canonicalized } define half @canon_fp16() { - ; CHECK-LABEL: .LCPI1_0: - ; CHECK: .short 0x4200 # half 3 - ; CHECK-LABEL: canon_fp16 - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovsh .LCPI1_0(%rip), %xmm0 - ; CHECK-NEXT: retq +; SSE-LABEL: canon_fp16: +; SSE: # %bb.0: +; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: retq +; +; SSE2-LABEL: canon_fp16: +; SSE2: # %bb.0: +; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: canon_fp16: +; AVX: # %bb.0: +; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX2-LABEL: canon_fp16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: canon_fp16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: canon_fp16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: retq %canonicalized = call half @llvm.canonicalize.f16(half 0xH4200) ; half 3.0 ret half %canonicalized } define double @canon_fp64() { - ; CHECK-LABEL: .LCPI2_0: - ; CHECK: .quad 0x4008000000000000 # double 3 - ; CHECK-LABEL: canon_fp64 - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovsd .LCPI2_0(%rip), %xmm0 - ; CHECK-NEXT: retq +; SSE-LABEL: canon_fp64: +; SSE: # %bb.0: +; SSE-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; SSE-NEXT: retq +; +; SSE2-LABEL: canon_fp64: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; SSE2-NEXT: retq +; +; AVX-LABEL: canon_fp64: +; AVX: # %bb.0: +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; AVX-NEXT: retq +; +; AVX2-LABEL: canon_fp64: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; AVX2-NEXT: retq +; +; AVX512F-LABEL: canon_fp64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: canon_fp64: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; AVX512BW-NEXT: retq %canonicalized = call double @llvm.canonicalize.f64(double 3.0) ret double %canonicalized } define x86_fp80 @canon_fp80() { - ; CHECK-LABEL: .LCPI3_0: - ; CHECK: .long 0x42b40000 # float 90 - ; CHECK-LABEL: canon_fp80 - ; CHECK: # %bb.0: - ; CHECK-NEXT: flds .LCPI3_0(%rip) - ; CHECK-NEXT: retq - +; SSE-LABEL: canon_fp80: +; SSE: # %bb.0: +; SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; SSE-NEXT: retq +; +; SSE2-LABEL: canon_fp80: +; SSE2: # %bb.0: +; SSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; SSE2-NEXT: retq +; +; AVX-LABEL: canon_fp80: +; AVX: # %bb.0: +; AVX-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX-NEXT: retq +; +; AVX2-LABEL: canon_fp80: +; AVX2: # %bb.0: +; AVX2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: canon_fp80: +; AVX512F: # %bb.0: +; AVX512F-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: canon_fp80: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX512BW-NEXT: retq %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000) ; 90.0 ret x86_fp80 %canonicalized } define x86_fp80 @complex_canonicalize_x86_fp80(x86_fp80 %a, x86_fp80 %b) { +; SSE-LABEL: complex_canonicalize_x86_fp80: +; SSE: # %bb.0: # %entry +; SSE-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; SSE-NEXT: retq +; +; SSE2-LABEL: complex_canonicalize_x86_fp80: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE2-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; SSE2-NEXT: retq +; +; AVX-LABEL: complex_canonicalize_x86_fp80: +; AVX: # %bb.0: # %entry +; AVX-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX-NEXT: retq +; +; AVX2-LABEL: complex_canonicalize_x86_fp80: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX2-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: complex_canonicalize_x86_fp80: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512F-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: complex_canonicalize_x86_fp80: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512BW-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX512BW-NEXT: retq entry: - ; CHECK-LABEL: .LCPI4_0: - ; CHECK: .long 0x42b40000 # float 90 - ; CHECK-LABEL: complex_canonicalize_x86_fp80 - ; CHECK: # %bb.0: - ; CHECK-NEXT: fldt 24(%rsp) - ; CHECK-NEXT: flds .LCPI4_0(%rip) - ; CHECK-NEXT: fsubp %st, %st(1) - ; CHECK-NEXT: retq - %mul1 = fsub x86_fp80 %a, %b %add = fadd x86_fp80 %mul1, %b %mul2 = fsub x86_fp80 %add, %mul1 @@ -66,14 +189,36 @@ entry: } define double @complex_canonicalize_fp64(double %a, double %b) unnamed_addr #0 { +; SSE-LABEL: complex_canonicalize_fp64: +; SSE: # %bb.0: # %start +; SSE-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; SSE-NEXT: retq +; +; SSE2-LABEL: complex_canonicalize_fp64: +; SSE2: # %bb.0: # %start +; SSE2-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; SSE2-NEXT: retq +; +; AVX-LABEL: complex_canonicalize_fp64: +; AVX: # %bb.0: # %start +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; AVX-NEXT: retq +; +; AVX2-LABEL: complex_canonicalize_fp64: +; AVX2: # %bb.0: # %start +; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; AVX2-NEXT: retq +; +; AVX512F-LABEL: complex_canonicalize_fp64: +; AVX512F: # %bb.0: # %start +; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: complex_canonicalize_fp64: +; AVX512BW: # %bb.0: # %start +; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] +; AVX512BW-NEXT: retq start: - ; CHECK-LABEL: .LCPI5_0: - ; CHECK: .quad 0x4008000000000000 # double 3 - ; CHECK-LABEL: complex_canonicalize_fp64 - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovsd .LCPI5_0(%rip), %xmm0 - ; CHECK-NEXT: retq - %c = fcmp olt double %a, %b %d = fcmp uno double %a, 0.000000e+00 %or.cond.i.i = or i1 %d, %c @@ -83,24 +228,70 @@ start: } define void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 { - ; CHECK-LAEBL: test_fold_canonicalize_p0_f32 - ; CHECK: # %bb.0: - ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 - ; CHECK-NEXT: vmovss %xmm0, (%rdi) - ; CHECK-NEXT: retq +; SSE-LABEL: test_fold_canonicalize_p0_f32: +; SSE: # %bb.0: +; SSE-NEXT: movl $0, (%rdi) +; SSE-NEXT: retq +; +; SSE2-LABEL: test_fold_canonicalize_p0_f32: +; SSE2: # %bb.0: +; SSE2-NEXT: movl $0, (%rdi) +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fold_canonicalize_p0_f32: +; AVX: # %bb.0: +; AVX-NEXT: movl $0, (%rdi) +; AVX-NEXT: retq +; +; AVX2-LABEL: test_fold_canonicalize_p0_f32: +; AVX2: # %bb.0: +; AVX2-NEXT: movl $0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: test_fold_canonicalize_p0_f32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: movl $0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: test_fold_canonicalize_p0_f32: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: movl $0, (%rdi) +; AVX512BW-NEXT: retq %canonicalized = call float @llvm.canonicalize.f32(float 0.0) store float %canonicalized, float addrspace(1)* %out ret void } define void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 { - ; CHECK-LAEBL: .LCPI7_0: - ; CHECK: .long 0x80000000 # float -0 - ; CHECK-LAEBL: test_fold_canonicalize_n0_f32 - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovss .LCPI7_0(%rip), %xmm0 - ; CHECK-NEXT: vmovss %xmm0, (%rdi) - ; CHECK-NEXT: retq +; SSE-LABEL: test_fold_canonicalize_n0_f32: +; SSE: # %bb.0: +; SSE-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; SSE-NEXT: retq +; +; SSE2-LABEL: test_fold_canonicalize_n0_f32: +; SSE2: # %bb.0: +; SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fold_canonicalize_n0_f32: +; AVX: # %bb.0: +; AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; AVX-NEXT: retq +; +; AVX2-LABEL: test_fold_canonicalize_n0_f32: +; AVX2: # %bb.0: +; AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: test_fold_canonicalize_n0_f32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: test_fold_canonicalize_n0_f32: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; AVX512BW-NEXT: retq %canonicalized = call float @llvm.canonicalize.f32(float -0.0) store float %canonicalized, float addrspace(1)* %out ret void @@ -108,27 +299,84 @@ define void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 { define void @v_test_canonicalize_p90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { - ; CHECK-LAEBL: .LCPI8_0: - ; CHECK: .long 0x42b40000 # float 90 - ; CHECK-LAEBL: v_test_canonicalize_p90_x86_fp80 - ; CHECK: # %bb.0: - ; CHECK-NEXT: flds .LCPI8_0(%rip) - ; CHECK-NEXT: fstpt (%rdi) - ; CHECK-NEXT: retq +; SSE-LABEL: v_test_canonicalize_p90_x86_fp80: +; SSE: # %bb.0: +; SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; SSE-NEXT: fstpt (%rdi) +; SSE-NEXT: retq +; +; SSE2-LABEL: v_test_canonicalize_p90_x86_fp80: +; SSE2: # %bb.0: +; SSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; SSE2-NEXT: fstpt (%rdi) +; SSE2-NEXT: retq +; +; AVX-LABEL: v_test_canonicalize_p90_x86_fp80: +; AVX: # %bb.0: +; AVX-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX-NEXT: fstpt (%rdi) +; AVX-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize_p90_x86_fp80: +; AVX2: # %bb.0: +; AVX2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX2-NEXT: fstpt (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize_p90_x86_fp80: +; AVX512F: # %bb.0: +; AVX512F-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX512F-NEXT: fstpt (%rdi) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize_p90_x86_fp80: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX512BW-NEXT: fstpt (%rdi) +; AVX512BW-NEXT: retq %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000) store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out ret void } define void @v_test_canonicalize_p3__half(half addrspace(1)* %out) { - ; CHECK-LABEL: .LCPI9_0: - ; CHECK: .short 0x4200 # half 3 - ; CHECK-LABEL: v_test_canonicalize_p3__half: - ; CHECK: # %bb.0: # %entry - ; CHECK-NEXT: vmovsh .LCPI9_0(%rip), %xmm0 - ; CHECK-NEXT: vmovsh %xmm0, (%rdi) - ; CHECK-NEXT: retq - +; SSE-LABEL: v_test_canonicalize_p3__half: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pextrw $0, %xmm0, %eax +; SSE-NEXT: movw %ax, (%rdi) +; SSE-NEXT: retq +; +; SSE2-LABEL: v_test_canonicalize_p3__half: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pextrw $0, %xmm0, %eax +; SSE2-NEXT: movw %ax, (%rdi) +; SSE2-NEXT: retq +; +; AVX-LABEL: v_test_canonicalize_p3__half: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize_p3__half: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize_p3__half: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize_p3__half: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX512BW-NEXT: retq entry: %canonicalized = call half @llvm.canonicalize.f16(half 0xH4200) store half %canonicalized, half addrspace(1)* %out @@ -136,13 +384,41 @@ entry: } define void @v_test_canonicalize_p3_f64(double addrspace(1)* %out) #1 { - ; CHECK-LABEL: .LCPI10_0: - ; CHECK: .quad 0x4008000000000000 # double 3 - ; CHECK-LAEBL: v_test_canonicalize_p3_f64 - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovsd .LCPI10_0(%rip), %xmm0 - ; CHECK-NEXT: vmovsd %xmm0, (%rdi) - ; CHECK-NEXT: retq +; SSE-LABEL: v_test_canonicalize_p3_f64: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 +; SSE-NEXT: movq %rax, (%rdi) +; SSE-NEXT: retq +; +; SSE2-LABEL: v_test_canonicalize_p3_f64: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 +; SSE2-NEXT: movq %rax, (%rdi) +; SSE2-NEXT: retq +; +; AVX-LABEL: v_test_canonicalize_p3_f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 +; AVX-NEXT: movq %rax, (%rdi) +; AVX-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize_p3_f64: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 +; AVX2-NEXT: movq %rax, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize_p3_f64: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 +; AVX512F-NEXT: movq %rax, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize_p3_f64: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 +; AVX512BW-NEXT: movq %rax, (%rdi) +; AVX512BW-NEXT: retq entry: %canonicalized = call double @llvm.canonicalize.f64(double 3.0) store double %canonicalized, double addrspace(1)* %out @@ -150,15 +426,35 @@ entry: } define void @v_test_canonicalize_p3__bfloat(bfloat addrspace(1)* %out) { - ; CHECK-LABEL: .LCPI11_0: - ; CHECK: .long 0x40400000 # float 3 - ; CHECK-LABEL: v_test_canonicalize_p3__bfloat: - ; CHECK: # %bb.0: # %entry - ; CHECK-NEXT: vmovss .LCPI11_0(%rip), %xmm0 # xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] - ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 - ; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi) - ; CHECK-NEXT: retq - +; SSE-LABEL: v_test_canonicalize_p3__bfloat: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movw $16448, (%rdi) # imm = 0x4040 +; SSE-NEXT: retq +; +; SSE2-LABEL: v_test_canonicalize_p3__bfloat: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movw $16448, (%rdi) # imm = 0x4040 +; SSE2-NEXT: retq +; +; AVX-LABEL: v_test_canonicalize_p3__bfloat: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movw $16448, (%rdi) # imm = 0x4040 +; AVX-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize_p3__bfloat: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: movw $16448, (%rdi) # imm = 0x4040 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize_p3__bfloat: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: movw $16448, (%rdi) # imm = 0x4040 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize_p3__bfloat: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: movw $16448, (%rdi) # imm = 0x4040 +; AVX512BW-NEXT: retq entry: %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat 3.0) store bfloat %canonicalized, bfloat addrspace(1)* %out @@ -166,15 +462,35 @@ entry: } define void @v_test_canonicalize_n3__bfloat(bfloat addrspace(1)* %out) { - ; CHECK-LABEL: .LCPI12_0: - ; CHECK: .long 0xc0400000 # float -3 - ; CHECK-LABEL: v_test_canonicalize_n3__bfloat: - ; CHECK: # %bb.0: # %entry - ; CHECK-NEXT: vmovss .LCPI12_0(%rip), %xmm0 # xmm0 = [-3.0E+0,0.0E+0,0.0E+0,0.0E+0] - ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 - ; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi) - ; CHECK-NEXT: retq - +; SSE-LABEL: v_test_canonicalize_n3__bfloat: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movw $-16320, (%rdi) # imm = 0xC040 +; SSE-NEXT: retq +; +; SSE2-LABEL: v_test_canonicalize_n3__bfloat: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movw $-16320, (%rdi) # imm = 0xC040 +; SSE2-NEXT: retq +; +; AVX-LABEL: v_test_canonicalize_n3__bfloat: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movw $-16320, (%rdi) # imm = 0xC040 +; AVX-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize_n3__bfloat: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: movw $-16320, (%rdi) # imm = 0xC040 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize_n3__bfloat: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: movw $-16320, (%rdi) # imm = 0xC040 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize_n3__bfloat: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: movw $-16320, (%rdi) # imm = 0xC040 +; AVX512BW-NEXT: retq entry: %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat -3.0) store bfloat %canonicalized, bfloat addrspace(1)* %out @@ -182,29 +498,86 @@ entry: } define void @v_test_canonicalize_n90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { - ; CHECK-LAEBL: .LCPI13_0: - ; CHECK: .long 0xc2b40000 # float -90 - ; CHECK-LAEBL: v_test_canonicalize_n90_x86_fp80 - ; CHECK: # %bb.0: - ; CHECK-NEXT: flds .LCPI13_0(%rip) - ; CHECK-NEXT: fstpt (%rdi) - ; CHECK-NEXT: retq +; SSE-LABEL: v_test_canonicalize_n90_x86_fp80: +; SSE: # %bb.0: +; SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; SSE-NEXT: fstpt (%rdi) +; SSE-NEXT: retq +; +; SSE2-LABEL: v_test_canonicalize_n90_x86_fp80: +; SSE2: # %bb.0: +; SSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; SSE2-NEXT: fstpt (%rdi) +; SSE2-NEXT: retq +; +; AVX-LABEL: v_test_canonicalize_n90_x86_fp80: +; AVX: # %bb.0: +; AVX-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX-NEXT: fstpt (%rdi) +; AVX-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize_n90_x86_fp80: +; AVX2: # %bb.0: +; AVX2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX2-NEXT: fstpt (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize_n90_x86_fp80: +; AVX512F: # %bb.0: +; AVX512F-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX512F-NEXT: fstpt (%rdi) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize_n90_x86_fp80: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; AVX512BW-NEXT: fstpt (%rdi) +; AVX512BW-NEXT: retq %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xKC005B400000000000000) store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out ret void } define void @v_test_canonicalize_n3__half(half addrspace(1)* %out) { - ; CHECK-LABEL: .LCPI14_0: - ; CHECK: .short 0xc200 # half -3 - ; CHECK-LABEL: v_test_canonicalize_n3__half: - ; CHECK: # %bb.0: # %entry - ; CHECK-NEXT: vmovsh .LCPI14_0(%rip), %xmm0 - ; CHECK-NEXT: vmovsh %xmm0, (%rdi) - ; CHECK-NEXT: retq - +; SSE-LABEL: v_test_canonicalize_n3__half: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pextrw $0, %xmm0, %eax +; SSE-NEXT: movw %ax, (%rdi) +; SSE-NEXT: retq +; +; SSE2-LABEL: v_test_canonicalize_n3__half: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pextrw $0, %xmm0, %eax +; SSE2-NEXT: movw %ax, (%rdi) +; SSE2-NEXT: retq +; +; AVX-LABEL: v_test_canonicalize_n3__half: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize_n3__half: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize_n3__half: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize_n3__half: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) +; AVX512BW-NEXT: retq entry: %canonicalized = call half @llvm.canonicalize.f16(half 0xHC200) store half %canonicalized, half addrspace(1)* %out ret void -} \ No newline at end of file +} diff --git a/llvm/test/CodeGen/X86/canonicalize-subnormals.ll b/llvm/test/CodeGen/X86/canonicalize-subnormals.ll index 8e7e04c2a67dc..034da96271eb8 100644 --- a/llvm/test/CodeGen/X86/canonicalize-subnormals.ll +++ b/llvm/test/CodeGen/X86/canonicalize-subnormals.ll @@ -1,30 +1,269 @@ -; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck %s -; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL %s -; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5 +; RUN: llc -mattr=sse2 -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-SSE2 %s +; RUN: llc -mattr=sse2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-SSE2 %s +; RUN: llc -mattr=sse2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-SSE2 %s +; RUN: llc -mattr=+avx -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX %s +; RUN: llc -mattr=+avx -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX %s +; RUN: llc -mattr=+avx -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX %s +; RUN: llc -mattr=+avx2 -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX2 %s +; RUN: llc -mattr=+avx2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX2 %s +; RUN: llc -mattr=+avx2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX2 %s +; RUN: llc -mattr=+avx512f -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX512F %s +; RUN: llc -mattr=+avx512f -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX512F %s +; RUN: llc -mattr=+avx512f -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX512F %s +; RUN: llc -mattr=+avx512bw -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX512BW %s +; RUN: llc -mattr=+avx512bw -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX512BW %s +; RUN: llc -mattr=+avx512bw -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX512BW %s -define void @canonicalize_denormal1_f32_pre_sign(float addrspace(1)* %out) { - ; CHECK-LABEL: .LCPI0_0: - ; CHECK: .long 0x80000000 # float -0 - ; CHECK-LABEL: canonicalize_denormal1_f32_pre_sign: - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovss .LCPI0_0(%rip), %xmm0 - ; CHECK-NEXT: vmovss %xmm0, (%rdi) - ; CHECK-NEXT: retq +define double @test_bad_subnormal() { +; PRE-SIGN-SSE2-LABEL: test_bad_subnormal: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: test_bad_subnormal: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: test_bad_subnormal: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: test_bad_subnormal: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: test_bad_subnormal: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: test_bad_subnormal: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: test_bad_subnormal: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: test_bad_subnormal: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: test_bad_subnormal: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: test_bad_subnormal: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: test_bad_subnormal: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: test_bad_subnormal: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: test_bad_subnormal: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: test_bad_subnormal: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: test_bad_subnormal: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] +; DYN-DENORMAL-AVX512BW-NEXT: retq + %canon = call double @llvm.canonicalize(double 0x7ff8000000000001) ; Nan + ret double %canon +} +define void @canonicalize_denormal1_f32_pre_sign(float addrspace(1)* %out) { +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f32_pre_sign: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_pre_sign: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_pre_sign: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f32_pre_sign: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_pre_sign: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_pre_sign: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f32_pre_sign: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_pre_sign: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_pre_sign: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f32_pre_sign: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_pre_sign: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_pre_sign: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f32_pre_sign: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_pre_sign: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_pre_sign: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) store float %canonicalized, float addrspace(1)* %out ret void } define void @canonicalize_denormal1_f64_pre_sign(double addrspace(1)* %out) { - ; CHECK-LABEL: .LCPI1_0: - ; CHECK: .quad 0x8000000000000000 # double -0 - ; CHECK-LABEL: canonicalize_denormal1_f64_pre_sign: - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovsd .LCPI1_0(%rip), %xmm0 - ; CHECK-NEXT: vmovsd %xmm0, (%rdi) - ; CHECK-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f64_pre_sign: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_pre_sign: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_pre_sign: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f64_pre_sign: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_pre_sign: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_pre_sign: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f64_pre_sign: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_pre_sign: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_pre_sign: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f64_pre_sign: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_pre_sign: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_pre_sign: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f64_pre_sign: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_pre_sign: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_pre_sign: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) store double %canonicalized, double addrspace(1)* %out ret void @@ -32,141 +271,875 @@ define void @canonicalize_denormal1_f64_pre_sign(double addrspace(1)* %out) { define void @canonicalize_qnan_f64(double addrspace(1)* %out) { - ;cCHECK-LABEL: .LCPI2_0: - ;cCHECK: .quad 0x7ff8000000000000 # double NaN - ; CHECK-LABEL: canonicalize_qnan_f64: - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovsd .LCPI2_0(%rip), %xmm0 - ; CHECK-NEXT: vmovsd %xmm0, (%rdi) - ; CHECK-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_qnan_f64: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_qnan_f64: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_qnan_f64: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_qnan_f64: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_qnan_f64: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_qnan_f64: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_qnan_f64: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_qnan_f64: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_qnan_f64: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_qnan_f64: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_qnan_f64: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_qnan_f64: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_qnan_f64: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_f64: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_f64: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000) store double %canonicalized, double addrspace(1)* %out ret void } define void @canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) { - ;cCHECK-LABEL: .LCPI3_0: - ;cCHECK: .quad 0xffffffffffffffff # double NaN - ; CHECK-LABEL: canonicalize_qnan_value_neg1_f64: - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovsd .LCPI3_0(%rip), %xmm0 - ; CHECK-NEXT: vmovsd %xmm0, (%rdi) - ; CHECK-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_qnan_value_neg1_f64: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movq $-1, (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg1_f64: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movq $-1, (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg1_f64: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movq $-1, (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_qnan_value_neg1_f64: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movq $-1, (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg1_f64: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movq $-1, (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg1_f64: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movq $-1, (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_qnan_value_neg1_f64: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movq $-1, (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg1_f64: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movq $-1, (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg1_f64: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movq $-1, (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_qnan_value_neg1_f64: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movq $-1, (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg1_f64: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movq $-1, (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg1_f64: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movq $-1, (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_qnan_value_neg1_f64: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movq $-1, (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg1_f64: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movq $-1, (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg1_f64: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movq $-1, (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double)) store double %canonicalized, double addrspace(1)* %out ret void } define void @canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) { - ; CHECK-LABEL: .LCPI4_0: - ; CHECK: .quad 0xfffffffffffffffe # double NaN - ; CHECK-LABEL: canonicalize_qnan_value_neg2_f64: - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovsd .LCPI4_0(%rip), %xmm0 - ; CHECK-NEXT: vmovsd %xmm0, (%rdi) - ; CHECK-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_qnan_value_neg2_f64: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movq $-2, (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg2_f64: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movq $-2, (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg2_f64: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movq $-2, (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_qnan_value_neg2_f64: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movq $-2, (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg2_f64: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movq $-2, (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg2_f64: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movq $-2, (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_qnan_value_neg2_f64: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movq $-2, (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg2_f64: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movq $-2, (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg2_f64: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movq $-2, (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_qnan_value_neg2_f64: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movq $-2, (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg2_f64: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movq $-2, (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg2_f64: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movq $-2, (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_qnan_value_neg2_f64: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movq $-2, (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg2_f64: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movq $-2, (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg2_f64: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movq $-2, (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double)) store double %canonicalized, double addrspace(1)* %out ret void } define void @canonicalize_snan0_value_f64(double addrspace(1)* %out) { - ; CHECK-LABEL: .LCPI5_0: - ; CHECK: .quad 0x7ff8000000000000 # double NaN - ; CHECK-LABEL: canonicalize_snan0_value_f64: - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovsd .LCPI5_0(%rip), %xmm0 - ; CHECK-NEXT: vmovsd %xmm0, (%rdi) - ; CHECK-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_snan0_value_f64: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_snan0_value_f64: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_snan0_value_f64: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_snan0_value_f64: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_snan0_value_f64: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_snan0_value_f64: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_snan0_value_f64: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_snan0_value_f64: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_snan0_value_f64: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_snan0_value_f64: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_snan0_value_f64: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_snan0_value_f64: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_snan0_value_f64: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_snan0_value_f64: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_snan0_value_f64: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double)) store double %canonicalized, double addrspace(1)* %out ret void } define void @canonicalize_undef(double addrspace(1)* %out) { - ; CHECK-LABEL: .LCPI6_0: - ; CHECK: .quad 0x7ff8000000000000 # double NaN - ; CHECK-LABEL: canonicalize_undef: - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovsd .LCPI6_0(%rip), %xmm0 - ; CHECK-NEXT: vmovsd %xmm0, (%rdi) - ; CHECK-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_undef: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_undef: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_undef: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_undef: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_undef: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_undef: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_undef: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_undef: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_undef: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_undef: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_undef: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_undef: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_undef: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_undef: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_undef: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call double @llvm.canonicalize.f64(double undef) store double %canonicalized, double addrspace(1)* %out ret void } define void @canonicalize_denormal1_f32_ieee(float addrspace(1)* %out) { - ; IEEE-DENORMAL-LABEL: .LCPI7_0: - ; IEEE-DENORMAL: .long 0x807fffff # float -1.17549421E-38 - ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_f32_ieee: - ; IEEE-DENORMAL: # %bb.0: - ; IEEE-DENORMAL-NEXT: vmovss .LCPI7_0(%rip), %xmm0 - ; IEEE-DENORMAL-NEXT: vmovss %xmm0, (%rdi) - ; IEEE-DENORMAL-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f32_ieee: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_ieee: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_ieee: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f32_ieee: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_ieee: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_ieee: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f32_ieee: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_ieee: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_ieee: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f32_ieee: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_ieee: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_ieee: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f32_ieee: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_ieee: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_ieee: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) store float %canonicalized, float addrspace(1)* %out ret void } define void @canonicalize_denormal1_f64_ieee(double addrspace(1)* %out) { - ; IEEE-DENORMAL-LABEL: .LCPI8_0: - ; IEEE-DENORMAL: .quad 0x800fffffffffffff # double -2.2250738585072009E-308 - ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_f64_ieee: - ; IEEE-DENORMAL: # %bb.0: - ; IEEE-DENORMAL-NEXT: vmovsd .LCPI8_0(%rip), %xmm0 - ; IEEE-DENORMAL-NEXT: vmovsd %xmm0, (%rdi) - ; IEEE-DENORMAL-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f64_ieee: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_ieee: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_ieee: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f64_ieee: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_ieee: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_ieee: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f64_ieee: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_ieee: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_ieee: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f64_ieee: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_ieee: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_ieee: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f64_ieee: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_ieee: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_ieee: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) store double %canonicalized, double addrspace(1)* %out ret void } define void @canonicalize_denormal1_f32_dynamic(float addrspace(1)* %out) { - ; DYN-DENORMAL-LABEL: .LCPI9_0: - ; DYN-DENORMAL: .long 0x807fffff # float -1.17549421E-38 - ; DYN-DENORMAL-LABEL: canonicalize_denormal1_f32_dynamic: - ; DYN-DENORMAL: # %bb.0: - ; DYN-DENORMAL-NEXT: vmovss .LCPI9_0(%rip), %xmm0 - ; DYN-DENORMAL-NEXT: vmovss %xmm0, (%rdi) - ; DYN-DENORMAL-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f32_dynamic: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_dynamic: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_dynamic: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f32_dynamic: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_dynamic: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_dynamic: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f32_dynamic: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_dynamic: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_dynamic: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f32_dynamic: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_dynamic: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_dynamic: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f32_dynamic: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_dynamic: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_dynamic: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) store float %canonicalized, float addrspace(1)* %out ret void } define void @canonicalize_denormal1_f64_dynamic(double addrspace(1)* %out) { - ; DYN-DENORMAL-LABEL: .LCPI10_0: - ; DYN-DENORMAL: .quad 0x800fffffffffffff # double -2.2250738585072009E-308 - ; DYN-DENORMAL-LABEL: canonicalize_denormal1_f64_dynamic: - ; DYN-DENORMAL: # %bb.0: - ; DYN-DENORMAL-NEXT: vmovsd .LCPI10_0(%rip), %xmm0 - ; DYN-DENORMAL-NEXT: vmovsd %xmm0, (%rdi) - ; DYN-DENORMAL-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f64_dynamic: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_dynamic: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_dynamic: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f64_dynamic: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_dynamic: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_dynamic: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f64_dynamic: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_dynamic: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_dynamic: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f64_dynamic: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_dynamic: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_dynamic: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f64_dynamic: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_dynamic: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_dynamic: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF +; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) store double %canonicalized, double addrspace(1)* %out ret void } define void @canonicalize_denormal1_bfloat_pre_sign(bfloat addrspace(1)* %out) { - ; CHECK-LABEL: .LCPI11_0: - ; CHECK: .long 0x80000000 # float -0 - ; CHECK-LABEL: canonicalize_denormal1_bfloat_pre_sign: - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovss .LCPI11_0(%rip), %xmm0 - ; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 - ; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi) - ; CHECK-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_pre_sign: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat)) store bfloat %canonicalized, bfloat addrspace(1)* %out ret void @@ -174,15 +1147,80 @@ define void @canonicalize_denormal1_bfloat_pre_sign(bfloat addrspace(1)* %out) { define void @canonicalize_denormal1_bfloat_ieee(bfloat addrspace(1)* %out) { - ; IEEE-DENORMAL-LABEL: .LCPI12_0: - ; IEEE-DENORMAL: .long 0x80000000 # float -0 - ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_bfloat_ieee: - ; IEEE-DENORMAL: # %bb.0: - ; IEEE-DENORMAL-NEXT: vmovss .LCPI12_0(%rip), %xmm0 - ; IEEE-DENORMAL-NEXT: vcvtneps2bf16 %xmm0, %xmm0 - ; IEEE-DENORMAL-NEXT: vpextrw $0, %xmm0, (%rdi) - ; IEEE-DENORMAL-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_bfloat_ieee: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_ieee: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_ieee: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_bfloat_ieee: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_ieee: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_ieee: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_bfloat_ieee: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_ieee: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_ieee: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_bfloat_ieee: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_ieee: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_ieee: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_bfloat_ieee: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_ieee: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_ieee: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat)) store bfloat %canonicalized, bfloat addrspace(1)* %out ret void @@ -190,29 +1228,178 @@ define void @canonicalize_denormal1_bfloat_ieee(bfloat addrspace(1)* %out) { define void @canonicalize_denormal1_bfloat_dynamic(bfloat addrspace(1)* %out) { - ; DYN-DENORMAL-LABEL: .LCPI13_0: - ; DYN-DENORMAL: .long 0x80000000 # float -0 - ; DYN-DENORMAL-LABEL: canonicalize_denormal1_bfloat_dynamic: - ; DYN-DENORMAL: # %bb.0: - ; DYN-DENORMAL-NEXT: vmovss .LCPI13_0(%rip), %xmm0 - ; DYN-DENORMAL-NEXT: vcvtneps2bf16 %xmm0, %xmm0 - ; DYN-DENORMAL-NEXT: vpextrw $0, %xmm0, (%rdi) - ; DYN-DENORMAL-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_bfloat_dynamic: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_dynamic: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_dynamic: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_bfloat_dynamic: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_dynamic: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_dynamic: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_bfloat_dynamic: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_dynamic: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_dynamic: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_bfloat_dynamic: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_dynamic: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_dynamic: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_bfloat_dynamic: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_dynamic: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_dynamic: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat)) store bfloat %canonicalized, bfloat addrspace(1)* %out ret void } define void @canonicalize_denormal1_half_pre_sign(half addrspace(1)* %out) { - ; CHECK-LABEL: .LCPI14_0: - ; CHECK: .short 0x8000 # half -0 - ; CHECK-LABEL: canonicalize_denormal1_half_pre_sign: - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovsh .LCPI14_0(%rip), %xmm0 - ; CHECK-NEXT: vmovsh %xmm0, (%rdi) - ; CHECK-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_half_pre_sign: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; PRE-SIGN-SSE2-NEXT: pextrw $0, %xmm0, %eax +; PRE-SIGN-SSE2-NEXT: movw %ax, (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_pre_sign: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; IEEE-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax +; IEEE-DENORMAL-SSE2-NEXT: movw %ax, (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_pre_sign: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; DYN-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax +; DYN-DENORMAL-SSE2-NEXT: movw %ax, (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_half_pre_sign: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; PRE-SIGN-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_pre_sign: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; IEEE-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_pre_sign: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; DYN-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_half_pre_sign: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; PRE-SIGN-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_pre_sign: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; IEEE-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_pre_sign: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; DYN-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_half_pre_sign: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; PRE-SIGN-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_pre_sign: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; IEEE-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_pre_sign: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; DYN-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_half_pre_sign: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; PRE-SIGN-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_pre_sign: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; IEEE-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_pre_sign: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; DYN-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half)) store half %canonicalized, half addrspace(1)* %out ret void @@ -220,68 +1407,482 @@ define void @canonicalize_denormal1_half_pre_sign(half addrspace(1)* %out) { define void @canonicalize_denormal1_half_ieee(half addrspace(1)* %out) { - ; IEEE-DENORMAL-LABEL: .LCPI15_0: - ; IEEE-DENORMAL: .short 0x8000 # half -0 - ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_half_ieee: - ; IEEE-DENORMAL: # %bb.0: - ; IEEE-DENORMAL-NEXT: vmovsh .LCPI15_0(%rip), %xmm0 - ; IEEE-DENORMAL-NEXT: vmovsh %xmm0, (%rdi) - ; IEEE-DENORMAL-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_half_ieee: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; PRE-SIGN-SSE2-NEXT: pextrw $0, %xmm0, %eax +; PRE-SIGN-SSE2-NEXT: movw %ax, (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_ieee: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; IEEE-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax +; IEEE-DENORMAL-SSE2-NEXT: movw %ax, (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_ieee: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; DYN-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax +; DYN-DENORMAL-SSE2-NEXT: movw %ax, (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_half_ieee: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; PRE-SIGN-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_ieee: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; IEEE-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_ieee: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; DYN-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_half_ieee: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; PRE-SIGN-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_ieee: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; IEEE-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_ieee: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; DYN-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_half_ieee: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; PRE-SIGN-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_ieee: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; IEEE-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_ieee: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; DYN-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_half_ieee: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; PRE-SIGN-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_ieee: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; IEEE-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_ieee: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; DYN-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half)) store half %canonicalized, half addrspace(1)* %out ret void } define void @canonicalize_denormal1_half_dynamic(half addrspace(1)* %out) { - ; DYN-DENORMAL-LABEL: .LCPI16_0: - ; DYN-DENORMAL: .short 0x8000 # half -0 - ; DYN-DENORMAL-LABEL: canonicalize_denormal1_half_dynamic: - ; DYN-DENORMAL: # %bb.0: - ; DYN-DENORMAL-NEXT: vmovsh .LCPI16_0(%rip), %xmm0 - ; DYN-DENORMAL-NEXT: vmovsh %xmm0, (%rdi) - ; DYN-DENORMAL-NEXT: retq - +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_half_dynamic: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; PRE-SIGN-SSE2-NEXT: pextrw $0, %xmm0, %eax +; PRE-SIGN-SSE2-NEXT: movw %ax, (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_dynamic: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; IEEE-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax +; IEEE-DENORMAL-SSE2-NEXT: movw %ax, (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_dynamic: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; DYN-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax +; DYN-DENORMAL-SSE2-NEXT: movw %ax, (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_half_dynamic: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; PRE-SIGN-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_dynamic: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; IEEE-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_dynamic: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; DYN-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_half_dynamic: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; PRE-SIGN-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_dynamic: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; IEEE-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_dynamic: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; DYN-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_half_dynamic: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; PRE-SIGN-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_dynamic: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; IEEE-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_dynamic: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; DYN-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_half_dynamic: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; PRE-SIGN-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_dynamic: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; IEEE-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_dynamic: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; DYN-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half)) store half %canonicalized, half addrspace(1)* %out ret void } define void @canonicalize_denormal1_x86_fp80_pre_sign(x86_fp80 addrspace(1)* %out) { - ; CHECK-LAEBL: .LCPI17_0: - ; CHECK: .long 0x00000000 # float 0 - ; CHECK-LAEBL: canonicalize_denormal1_x86_fp80_pre_sign - ; CHECK: # %bb.0: - ; CHECK-NEXT: flds .LCPI17_0(%rip) - ; CHECK-NEXT: fstpt (%rdi) - ; CHECK-NEXT: retq +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: fldz +; PRE-SIGN-SSE2-NEXT: fstpt (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-SSE2-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-SSE2-NEXT: fstpt (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: fldz +; PRE-SIGN-AVX-NEXT: fstpt (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-AVX-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-AVX-NEXT: fstpt (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: fldz +; PRE-SIGN-AVX2-NEXT: fstpt (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-AVX2-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-AVX2-NEXT: fstpt (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: fldz +; PRE-SIGN-AVX512F-NEXT: fstpt (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-AVX512F-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-AVX512F-NEXT: fstpt (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: fldz +; PRE-SIGN-AVX512BW-NEXT: fstpt (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-AVX512BW-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-AVX512BW-NEXT: fstpt (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001) store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out ret void } define void @canonicalize_denormal1_x86_fp80_dynamic(x86_fp80 addrspace(1)* %out) { - ; DYN-DENORMAL-LAEBL: .LCPI17_0: - ; DYN-DENORMAL: .quad 0x0000000000000001 # x86_fp80 3.64519953188247460253E-4951 - ; DYN-DENORMAL-LAEBL: canonicalize_denormal1_x86_fp80_dynamic - ; DYN-DENORMAL: # %bb.0: - ; DYN-DENORMAL-NEXT: fldt .LCPI17_0(%rip) - ; DYN-DENORMAL-NEXT: fstpt (%rdi) - ; DYN-DENORMAL-NEXT: retq +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: fldz +; PRE-SIGN-SSE2-NEXT: fstpt (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-SSE2-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-SSE2-NEXT: fstpt (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: fldz +; PRE-SIGN-AVX-NEXT: fstpt (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-AVX-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-AVX-NEXT: fstpt (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: fldz +; PRE-SIGN-AVX2-NEXT: fstpt (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-AVX2-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-AVX2-NEXT: fstpt (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: fldz +; PRE-SIGN-AVX512F-NEXT: fstpt (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-AVX512F-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-AVX512F-NEXT: fstpt (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: fldz +; PRE-SIGN-AVX512BW-NEXT: fstpt (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-AVX512BW-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_dynamic: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-AVX512BW-NEXT: fstpt (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001) store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out ret void } define void @canonicalize_denormal1_x86_fp80_ieee(x86_fp80 addrspace(1)* %out) { - ; IEEE-DENORMAL-LAEBL: .LCPI17_0: - ; IEEE-DENORMAL: .quad 0x0000000000000001 # x86_fp80 3.64519953188247460253E-4951 - ; IEEE-DENORMAL-LAEBL: canonicalize_denormal1_x86_fp80_ieee - ; IEEE-DENORMAL: # %bb.0: - ; IEEE-DENORMAL-NEXT: fldt .LCPI17_0(%rip) - ; IEEE-DENORMAL-NEXT: fstpt (%rdi) - ; IEEE-DENORMAL-NEXT: retq +; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; PRE-SIGN-SSE2: # %bb.0: +; PRE-SIGN-SSE2-NEXT: fldz +; PRE-SIGN-SSE2-NEXT: fstpt (%rdi) +; PRE-SIGN-SSE2-NEXT: retq +; +; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; IEEE-DENORMAL-SSE2: # %bb.0: +; IEEE-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-SSE2-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-SSE2-NEXT: retq +; +; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; DYN-DENORMAL-SSE2: # %bb.0: +; DYN-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-SSE2-NEXT: fstpt (%rdi) +; DYN-DENORMAL-SSE2-NEXT: retq +; +; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; PRE-SIGN-AVX: # %bb.0: +; PRE-SIGN-AVX-NEXT: fldz +; PRE-SIGN-AVX-NEXT: fstpt (%rdi) +; PRE-SIGN-AVX-NEXT: retq +; +; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; IEEE-DENORMAL-AVX: # %bb.0: +; IEEE-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-AVX-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-AVX-NEXT: retq +; +; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; DYN-DENORMAL-AVX: # %bb.0: +; DYN-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-AVX-NEXT: fstpt (%rdi) +; DYN-DENORMAL-AVX-NEXT: retq +; +; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; PRE-SIGN-AVX2: # %bb.0: +; PRE-SIGN-AVX2-NEXT: fldz +; PRE-SIGN-AVX2-NEXT: fstpt (%rdi) +; PRE-SIGN-AVX2-NEXT: retq +; +; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; IEEE-DENORMAL-AVX2: # %bb.0: +; IEEE-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-AVX2-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-AVX2-NEXT: retq +; +; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; DYN-DENORMAL-AVX2: # %bb.0: +; DYN-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-AVX2-NEXT: fstpt (%rdi) +; DYN-DENORMAL-AVX2-NEXT: retq +; +; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; PRE-SIGN-AVX512F: # %bb.0: +; PRE-SIGN-AVX512F-NEXT: fldz +; PRE-SIGN-AVX512F-NEXT: fstpt (%rdi) +; PRE-SIGN-AVX512F-NEXT: retq +; +; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; IEEE-DENORMAL-AVX512F: # %bb.0: +; IEEE-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-AVX512F-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-AVX512F-NEXT: retq +; +; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; DYN-DENORMAL-AVX512F: # %bb.0: +; DYN-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-AVX512F-NEXT: fstpt (%rdi) +; DYN-DENORMAL-AVX512F-NEXT: retq +; +; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; PRE-SIGN-AVX512BW: # %bb.0: +; PRE-SIGN-AVX512BW-NEXT: fldz +; PRE-SIGN-AVX512BW-NEXT: fstpt (%rdi) +; PRE-SIGN-AVX512BW-NEXT: retq +; +; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; IEEE-DENORMAL-AVX512BW: # %bb.0: +; IEEE-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; IEEE-DENORMAL-AVX512BW-NEXT: fstpt (%rdi) +; IEEE-DENORMAL-AVX512BW-NEXT: retq +; +; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_ieee: +; DYN-DENORMAL-AVX512BW: # %bb.0: +; DYN-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; DYN-DENORMAL-AVX512BW-NEXT: fstpt (%rdi) +; DYN-DENORMAL-AVX512BW-NEXT: retq %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001) store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out ret void -} \ No newline at end of file +} diff --git a/llvm/test/CodeGen/X86/canonicalize-vars.ll b/llvm/test/CodeGen/X86/canonicalize-vars.ll index c1b5dd0dddcd2..0075386c02361 100644 --- a/llvm/test/CodeGen/X86/canonicalize-vars.ll +++ b/llvm/test/CodeGen/X86/canonicalize-vars.ll @@ -1,14 +1,266 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5 -; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 < %s | FileCheck %s +; RUN: llc -mattr=sse -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE +; RUN: llc -mattr=sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE2 +; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX +; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX2 +; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512F +; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512BW + + +define float @canon_fp32_varargsf32(float %a) { +; SSE-LABEL: canon_fp32_varargsf32: +; SSE: # %bb.0: +; SSE-NEXT: retq +; +; SSE2-LABEL: canon_fp32_varargsf32: +; SSE2: # %bb.0: +; SSE2-NEXT: retq +; +; AVX-LABEL: canon_fp32_varargsf32: +; AVX: # %bb.0: +; AVX-NEXT: retq +; +; AVX2-LABEL: canon_fp32_varargsf32: +; AVX2: # %bb.0: +; AVX2-NEXT: retq +; +; AVX512F-LABEL: canon_fp32_varargsf32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: canon_fp32_varargsf32: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: retq + %canonicalized = call float @llvm.canonicalize.f32(float %a) + ret float %canonicalized +} + +define x86_fp80 @canon_fp32_varargsf80(x86_fp80 %a) { +; SSE-LABEL: canon_fp32_varargsf80: +; SSE: # %bb.0: +; SSE-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE-NEXT: retq +; +; SSE2-LABEL: canon_fp32_varargsf80: +; SSE2: # %bb.0: +; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE2-NEXT: retq +; +; AVX-LABEL: canon_fp32_varargsf80: +; AVX: # %bb.0: +; AVX-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX-NEXT: retq +; +; AVX2-LABEL: canon_fp32_varargsf80: +; AVX2: # %bb.0: +; AVX2-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: canon_fp32_varargsf80: +; AVX512F: # %bb.0: +; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: canon_fp32_varargsf80: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512BW-NEXT: retq + %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %a) + ret x86_fp80 %canonicalized +} + +define bfloat @canon_fp32_varargsbf16(bfloat %a) { +; SSE-LABEL: canon_fp32_varargsbf16: +; SSE: # %bb.0: +; SSE-NEXT: retq +; +; SSE2-LABEL: canon_fp32_varargsbf16: +; SSE2: # %bb.0: +; SSE2-NEXT: retq +; +; AVX-LABEL: canon_fp32_varargsbf16: +; AVX: # %bb.0: +; AVX-NEXT: retq +; +; AVX2-LABEL: canon_fp32_varargsbf16: +; AVX2: # %bb.0: +; AVX2-NEXT: retq +; +; AVX512F-LABEL: canon_fp32_varargsbf16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: canon_fp32_varargsbf16: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: retq + %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %a) + ret bfloat %canonicalized +} define half @complex_canonicalize_fmul_half(half %a, half %b) { -; CHECK-LABEL: complex_canonicalize_fmul_half: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vsubsh %xmm0, %xmm2, %xmm0 -; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: complex_canonicalize_fmul_half: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movss %xmm1, (%rsp) # 4-byte Spill +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: movss (%rsp), %xmm0 # 4-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; SSE-NEXT: # xmm1 = mem[0],zero,zero,zero +; SSE-NEXT: subss %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; SSE2-LABEL: complex_canonicalize_fmul_half: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pushq %rax +; SSE2-NEXT: .cfi_def_cfa_offset 16 +; SSE2-NEXT: movss %xmm1, (%rsp) # 4-byte Spill +; SSE2-NEXT: callq __extendhfsf2@PLT +; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE2-NEXT: movss (%rsp), %xmm0 # 4-byte Reload +; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: callq __extendhfsf2@PLT +; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: subss %xmm0, %xmm1 +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: callq __truncsfhf2@PLT +; SSE2-NEXT: callq __extendhfsf2@PLT +; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE2-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: callq __truncsfhf2@PLT +; SSE2-NEXT: callq __extendhfsf2@PLT +; SSE2-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: callq __truncsfhf2@PLT +; SSE2-NEXT: callq __extendhfsf2@PLT +; SSE2-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: callq __truncsfhf2@PLT +; SSE2-NEXT: popq %rax +; SSE2-NEXT: .cfi_def_cfa_offset 8 +; SSE2-NEXT: retq +; +; AVX-LABEL: complex_canonicalize_fmul_half: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss %xmm1, (%rsp) # 4-byte Spill +; AVX-NEXT: callq __extendhfsf2@PLT +; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX-NEXT: vmovss (%rsp), %xmm0 # 4-byte Reload +; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: callq __extendhfsf2@PLT +; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; AVX-NEXT: # xmm1 = mem[0],zero,zero,zero +; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: callq __truncsfhf2@PLT +; AVX-NEXT: callq __extendhfsf2@PLT +; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX-NEXT: callq __truncsfhf2@PLT +; AVX-NEXT: callq __extendhfsf2@PLT +; AVX-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX-NEXT: callq __truncsfhf2@PLT +; AVX-NEXT: callq __extendhfsf2@PLT +; AVX-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX-NEXT: callq __truncsfhf2@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +; +; AVX2-LABEL: complex_canonicalize_fmul_half: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: pushq %rax +; AVX2-NEXT: .cfi_def_cfa_offset 16 +; AVX2-NEXT: vmovss %xmm1, (%rsp) # 4-byte Spill +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX2-NEXT: vmovss (%rsp), %xmm0 # 4-byte Reload +; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; AVX2-NEXT: # xmm1 = mem[0],zero,zero,zero +; AVX2-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX2-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: popq %rax +; AVX2-NEXT: .cfi_def_cfa_offset 8 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: complex_canonicalize_fmul_half: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpextrw $0, %xmm1, %eax +; AVX512F-NEXT: vpextrw $0, %xmm0, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm0 +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512F-NEXT: vmovd %eax, %xmm1 +; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512F-NEXT: vaddss %xmm1, %xmm0, %xmm2 +; AVX512F-NEXT: vcvtps2ph $4, %xmm2, %xmm2 +; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX512F-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512F-NEXT: vmovd %xmm0, %eax +; AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: complex_canonicalize_fmul_half: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax +; AVX512BW-NEXT: vpextrw $0, %xmm0, %ecx +; AVX512BW-NEXT: vmovd %ecx, %xmm0 +; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512BW-NEXT: vmovd %eax, %xmm1 +; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512BW-NEXT: vaddss %xmm1, %xmm0, %xmm2 +; AVX512BW-NEXT: vcvtps2ph $4, %xmm2, %xmm2 +; AVX512BW-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX512BW-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; AVX512BW-NEXT: retq entry: %mul1 = fsub half %a, %b @@ -20,17 +272,72 @@ entry: } define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) { +; SSE-LABEL: complex_canonicalize_fmul_x86_fp80: +; SSE: # %bb.0: # %entry +; SSE-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE-NEXT: fsub %st(1), %st +; SSE-NEXT: fld %st(0) +; SSE-NEXT: fadd %st(2), %st +; SSE-NEXT: fsubp %st, %st(1) +; SSE-NEXT: fsubp %st, %st(1) +; SSE-NEXT: retq +; +; SSE2-LABEL: complex_canonicalize_fmul_x86_fp80: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE2-NEXT: fsub %st(1), %st +; SSE2-NEXT: fld %st(0) +; SSE2-NEXT: fadd %st(2), %st +; SSE2-NEXT: fsubp %st, %st(1) +; SSE2-NEXT: fsubp %st, %st(1) +; SSE2-NEXT: retq +; +; AVX-LABEL: complex_canonicalize_fmul_x86_fp80: +; AVX: # %bb.0: # %entry +; AVX-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX-NEXT: fsub %st(1), %st +; AVX-NEXT: fld %st(0) +; AVX-NEXT: fadd %st(2), %st +; AVX-NEXT: fsubp %st, %st(1) +; AVX-NEXT: fsubp %st, %st(1) +; AVX-NEXT: retq +; +; AVX2-LABEL: complex_canonicalize_fmul_x86_fp80: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX2-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX2-NEXT: fsub %st(1), %st +; AVX2-NEXT: fld %st(0) +; AVX2-NEXT: fadd %st(2), %st +; AVX2-NEXT: fsubp %st, %st(1) +; AVX2-NEXT: fsubp %st, %st(1) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: complex_canonicalize_fmul_x86_fp80: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512F-NEXT: fsub %st(1), %st +; AVX512F-NEXT: fld %st(0) +; AVX512F-NEXT: fadd %st(2), %st +; AVX512F-NEXT: fsubp %st, %st(1) +; AVX512F-NEXT: fsubp %st, %st(1) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: complex_canonicalize_fmul_x86_fp80: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512BW-NEXT: fsub %st(1), %st +; AVX512BW-NEXT: fld %st(0) +; AVX512BW-NEXT: fadd %st(2), %st +; AVX512BW-NEXT: fsubp %st, %st(1) +; AVX512BW-NEXT: fsubp %st, %st(1) +; AVX512BW-NEXT: retq entry: - ; CHECK-LABEL: complex_canonicalize_fmul_x86_fp80 - ; CHECK: # %bb.0: - ; CHECK-NEXT: fldt 24(%rsp) - ; CHECK-NEXT: fldt 8(%rsp) - ; CHECK-NEXT: fsub %st(1), %st - ; CHECK-NEXT: fld %st(0) - ; CHECK-NEXT: fadd %st(2), %st - ; CHECK-NEXT: fsubp %st, %st(1) - ; CHECK-NEXT: fsubp %st, %st(1) - ; CHECK-NEXT: retq %mul1 = fsub x86_fp80 %a, %b %add = fadd x86_fp80 %mul1, %b @@ -41,39 +348,203 @@ entry: } define bfloat @complex_canonicalize_fmul_bfloat(bfloat %a, bfloat %b) { -; CHECK-LABEL: complex_canonicalize_fmul_bfloat: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmovw %xmm0, %eax -; CHECK-NEXT: vmovw %xmm1, %ecx -; CHECK-NEXT: shll $16, %ecx -; CHECK-NEXT: vmovd %ecx, %xmm0 -; CHECK-NEXT: shll $16, %eax -; CHECK-NEXT: vmovd %eax, %xmm1 -; CHECK-NEXT: vsubss %xmm0, %xmm1, %xmm1 -; CHECK-NEXT: vcvtneps2bf16 %xmm1, %xmm1 -; CHECK-NEXT: vmovw %xmm1, %eax -; CHECK-NEXT: shll $16, %eax -; CHECK-NEXT: vmovd %eax, %xmm1 -; CHECK-NEXT: vaddss %xmm0, %xmm1, %xmm2 -; CHECK-NEXT: vcvtneps2bf16 %xmm2, %xmm2 -; CHECK-NEXT: vmovw %xmm2, %eax -; CHECK-NEXT: shll $16, %eax -; CHECK-NEXT: vmovd %eax, %xmm2 -; CHECK-NEXT: vsubss %xmm1, %xmm2, %xmm1 -; CHECK-NEXT: vcvtneps2bf16 %xmm1, %xmm1 -; CHECK-NEXT: vmovw %xmm1, %eax -; CHECK-NEXT: shll $16, %eax -; CHECK-NEXT: vmovd %eax, %xmm1 -; CHECK-NEXT: vcvtneps2bf16 %xmm1, %xmm1 -; CHECK-NEXT: vmovw %xmm1, %eax -; CHECK-NEXT: shll $16, %eax -; CHECK-NEXT: vmovd %eax, %xmm1 -; CHECK-NEXT: vsubss %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 -; CHECK-NEXT: vmovw %xmm0, %eax -; CHECK-NEXT: vmovw %eax, %xmm0 -; CHECK-NEXT: retq - +; SSE-LABEL: complex_canonicalize_fmul_bfloat: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: pextrw $0, %xmm0, %eax +; SSE-NEXT: pextrw $0, %xmm1, %ecx +; SSE-NEXT: shll $16, %ecx +; SSE-NEXT: movd %ecx, %xmm1 +; SSE-NEXT: movd %xmm1, (%rsp) # 4-byte Folded Spill +; SSE-NEXT: shll $16, %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: subss %xmm1, %xmm0 +; SSE-NEXT: callq __truncsfbf2@PLT +; SSE-NEXT: pextrw $0, %xmm0, %eax +; SSE-NEXT: shll $16, %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; SSE-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfbf2@PLT +; SSE-NEXT: pextrw $0, %xmm0, %eax +; SSE-NEXT: shll $16, %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfbf2@PLT +; SSE-NEXT: pextrw $0, %xmm0, %eax +; SSE-NEXT: shll $16, %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfbf2@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; SSE2-LABEL: complex_canonicalize_fmul_bfloat: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pushq %rax +; SSE2-NEXT: .cfi_def_cfa_offset 16 +; SSE2-NEXT: pextrw $0, %xmm0, %eax +; SSE2-NEXT: pextrw $0, %xmm1, %ecx +; SSE2-NEXT: shll $16, %ecx +; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: movd %xmm1, (%rsp) # 4-byte Folded Spill +; SSE2-NEXT: shll $16, %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: subss %xmm1, %xmm0 +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: pextrw $0, %xmm0, %eax +; SSE2-NEXT: shll $16, %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; SSE2-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: pextrw $0, %xmm0, %eax +; SSE2-NEXT: shll $16, %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: pextrw $0, %xmm0, %eax +; SSE2-NEXT: shll $16, %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: popq %rax +; SSE2-NEXT: .cfi_def_cfa_offset 8 +; SSE2-NEXT: retq +; +; AVX-LABEL: complex_canonicalize_fmul_bfloat: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vpextrw $0, %xmm0, %eax +; AVX-NEXT: vpextrw $0, %xmm1, %ecx +; AVX-NEXT: shll $16, %ecx +; AVX-NEXT: vmovd %ecx, %xmm1 +; AVX-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill +; AVX-NEXT: shll $16, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: callq __truncsfbf2@PLT +; AVX-NEXT: vpextrw $0, %xmm0, %eax +; AVX-NEXT: shll $16, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; AVX-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX-NEXT: callq __truncsfbf2@PLT +; AVX-NEXT: vpextrw $0, %xmm0, %eax +; AVX-NEXT: shll $16, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX-NEXT: callq __truncsfbf2@PLT +; AVX-NEXT: vpextrw $0, %xmm0, %eax +; AVX-NEXT: shll $16, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX-NEXT: callq __truncsfbf2@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +; +; AVX2-LABEL: complex_canonicalize_fmul_bfloat: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: pushq %rax +; AVX2-NEXT: .cfi_def_cfa_offset 16 +; AVX2-NEXT: vpextrw $0, %xmm0, %eax +; AVX2-NEXT: vpextrw $0, %xmm1, %ecx +; AVX2-NEXT: shll $16, %ecx +; AVX2-NEXT: vmovd %ecx, %xmm1 +; AVX2-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill +; AVX2-NEXT: shll $16, %eax +; AVX2-NEXT: vmovd %eax, %xmm0 +; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: callq __truncsfbf2@PLT +; AVX2-NEXT: vpextrw $0, %xmm0, %eax +; AVX2-NEXT: shll $16, %eax +; AVX2-NEXT: vmovd %eax, %xmm0 +; AVX2-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; AVX2-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfbf2@PLT +; AVX2-NEXT: vpextrw $0, %xmm0, %eax +; AVX2-NEXT: shll $16, %eax +; AVX2-NEXT: vmovd %eax, %xmm0 +; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfbf2@PLT +; AVX2-NEXT: vpextrw $0, %xmm0, %eax +; AVX2-NEXT: shll $16, %eax +; AVX2-NEXT: vmovd %eax, %xmm0 +; AVX2-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfbf2@PLT +; AVX2-NEXT: popq %rax +; AVX2-NEXT: .cfi_def_cfa_offset 8 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: complex_canonicalize_fmul_bfloat: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: pushq %rax +; AVX512F-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-NEXT: vpextrw $0, %xmm0, %eax +; AVX512F-NEXT: vpextrw $0, %xmm1, %ecx +; AVX512F-NEXT: shll $16, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm1 +; AVX512F-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: vmovd %eax, %xmm0 +; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: callq __truncsfbf2@PLT +; AVX512F-NEXT: vpextrw $0, %xmm0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: vmovd %eax, %xmm0 +; AVX512F-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; AVX512F-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX512F-NEXT: callq __truncsfbf2@PLT +; AVX512F-NEXT: vpextrw $0, %xmm0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: vmovd %eax, %xmm0 +; AVX512F-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX512F-NEXT: callq __truncsfbf2@PLT +; AVX512F-NEXT: vpextrw $0, %xmm0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: vmovd %eax, %xmm0 +; AVX512F-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX512F-NEXT: callq __truncsfbf2@PLT +; AVX512F-NEXT: popq %rax +; AVX512F-NEXT: .cfi_def_cfa_offset 8 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: complex_canonicalize_fmul_bfloat: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: pushq %rax +; AVX512BW-NEXT: .cfi_def_cfa_offset 16 +; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax +; AVX512BW-NEXT: vpextrw $0, %xmm1, %ecx +; AVX512BW-NEXT: shll $16, %ecx +; AVX512BW-NEXT: vmovd %ecx, %xmm1 +; AVX512BW-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill +; AVX512BW-NEXT: shll $16, %eax +; AVX512BW-NEXT: vmovd %eax, %xmm0 +; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: callq __truncsfbf2@PLT +; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax +; AVX512BW-NEXT: shll $16, %eax +; AVX512BW-NEXT: vmovd %eax, %xmm0 +; AVX512BW-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; AVX512BW-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX512BW-NEXT: callq __truncsfbf2@PLT +; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax +; AVX512BW-NEXT: shll $16, %eax +; AVX512BW-NEXT: vmovd %eax, %xmm0 +; AVX512BW-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX512BW-NEXT: callq __truncsfbf2@PLT +; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax +; AVX512BW-NEXT: shll $16, %eax +; AVX512BW-NEXT: vmovd %eax, %xmm0 +; AVX512BW-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX512BW-NEXT: callq __truncsfbf2@PLT +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: .cfi_def_cfa_offset 8 +; AVX512BW-NEXT: retq entry: %sub1 = fsub bfloat %a, %b @@ -85,14 +556,60 @@ entry: } define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { +; SSE-LABEL: canonicalize_fp64: +; SSE: # %bb.0: # %start +; SSE-NEXT: movapd %xmm0, %xmm2 +; SSE-NEXT: cmpunordsd %xmm0, %xmm2 +; SSE-NEXT: movapd %xmm2, %xmm3 +; SSE-NEXT: andpd %xmm1, %xmm3 +; SSE-NEXT: maxsd %xmm0, %xmm1 +; SSE-NEXT: andnpd %xmm1, %xmm2 +; SSE-NEXT: orpd %xmm3, %xmm2 +; SSE-NEXT: movapd %xmm2, %xmm0 +; SSE-NEXT: retq +; +; SSE2-LABEL: canonicalize_fp64: +; SSE2: # %bb.0: # %start +; SSE2-NEXT: movapd %xmm0, %xmm2 +; SSE2-NEXT: cmpunordsd %xmm0, %xmm2 +; SSE2-NEXT: movapd %xmm2, %xmm3 +; SSE2-NEXT: andpd %xmm1, %xmm3 +; SSE2-NEXT: maxsd %xmm0, %xmm1 +; SSE2-NEXT: andnpd %xmm1, %xmm2 +; SSE2-NEXT: orpd %xmm3, %xmm2 +; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: canonicalize_fp64: +; AVX: # %bb.0: # %start +; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 +; AVX-NEXT: retq +; +; AVX2-LABEL: canonicalize_fp64: +; AVX2: # %bb.0: # %start +; AVX2-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: canonicalize_fp64: +; AVX512F: # %bb.0: # %start +; AVX512F-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 +; AVX512F-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512F-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} +; AVX512F-NEXT: vmovapd %xmm2, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: canonicalize_fp64: +; AVX512BW: # %bb.0: # %start +; AVX512BW-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 +; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} +; AVX512BW-NEXT: vmovapd %xmm2, %xmm0 +; AVX512BW-NEXT: retq start: - ; CHECK-LABEL: canonicalize_fp64: - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 - ; CHECK-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 - ; CHECK-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} - ; CHECK-NEXT: vmovapd %xmm2, %xmm0 - ; CHECK-NEXT: retq %c = fcmp olt double %a, %b %d = fcmp uno double %a, 0.000000e+00 @@ -103,14 +620,60 @@ start: } define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 { +; SSE-LABEL: canonicalize_fp32: +; SSE: # %bb.0: # %start +; SSE-NEXT: movaps %xmm0, %xmm2 +; SSE-NEXT: cmpunordss %xmm0, %xmm2 +; SSE-NEXT: movaps %xmm2, %xmm3 +; SSE-NEXT: andps %xmm1, %xmm3 +; SSE-NEXT: maxss %xmm0, %xmm1 +; SSE-NEXT: andnps %xmm1, %xmm2 +; SSE-NEXT: orps %xmm3, %xmm2 +; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: retq +; +; SSE2-LABEL: canonicalize_fp32: +; SSE2: # %bb.0: # %start +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: cmpunordss %xmm0, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm1, %xmm3 +; SSE2-NEXT: maxss %xmm0, %xmm1 +; SSE2-NEXT: andnps %xmm1, %xmm2 +; SSE2-NEXT: orps %xmm3, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: canonicalize_fp32: +; AVX: # %bb.0: # %start +; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX-NEXT: retq +; +; AVX2-LABEL: canonicalize_fp32: +; AVX2: # %bb.0: # %start +; AVX2-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; AVX2-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: canonicalize_fp32: +; AVX512F: # %bb.0: # %start +; AVX512F-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; AVX512F-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512F-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512F-NEXT: vmovaps %xmm2, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: canonicalize_fp32: +; AVX512BW: # %bb.0: # %start +; AVX512BW-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 +; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} +; AVX512BW-NEXT: vmovaps %xmm2, %xmm0 +; AVX512BW-NEXT: retq start: - ; CHECK-LABEL: canonicalize_fp32: - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmaxss %xmm0, %xmm1, %xmm2 - ; CHECK-NEXT: vcmpunordss %xmm0, %xmm0, %k1 - ; CHECK-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} - ; CHECK-NEXT: vmovaps %xmm2, %xmm0 - ; CHECK-NEXT: retq %cc = fcmp olt float %aa, %bb %dd = fcmp uno float %aa, 0.000000e+00 @@ -121,11 +684,47 @@ start: } define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 { - ; CHECK-LAEBL: v_test_canonicalize_var_f32 - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovss (%rdi), %xmm0 - ; CHECK-NEXT: vmovss %xmm0, (%rdi) - ; CHECK-NEXT: retq +; SSE-LABEL: v_test_canonicalize_var_f32: +; SSE: # %bb.0: +; SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; SSE-NEXT: mulss (%rdi), %xmm0 +; SSE-NEXT: movss %xmm0, (%rdi) +; SSE-NEXT: retq +; +; SSE2-LABEL: v_test_canonicalize_var_f32: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; SSE2-NEXT: mulss (%rdi), %xmm0 +; SSE2-NEXT: movss %xmm0, (%rdi) +; SSE2-NEXT: retq +; +; AVX-LABEL: v_test_canonicalize_var_f32: +; AVX: # %bb.0: +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0 +; AVX-NEXT: vmovss %xmm0, (%rdi) +; AVX-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize_var_f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX2-NEXT: vmulss (%rdi), %xmm0, %xmm0 +; AVX2-NEXT: vmovss %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize_var_f32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX512F-NEXT: vmulss (%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vmovss %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize_var_f32: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX512BW-NEXT: vmulss (%rdi), %xmm0, %xmm0 +; AVX512BW-NEXT: vmovss %xmm0, (%rdi) +; AVX512BW-NEXT: retq %val = load float, float addrspace(1)* %out %canonicalized = call float @llvm.canonicalize.f32(float %val) store float %canonicalized, float addrspace(1)* %out @@ -133,11 +732,53 @@ define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 { } define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { - ; CHECK-LAEBL: v_test_canonicalize_x86_fp80 - ; CHECK: # %bb.0: - ; CHECK-NEXT: fldt (%rdi) - ; CHECK-NEXT: fstpt (%rdi) - ; CHECK-NEXT: retq +; SSE-LABEL: v_test_canonicalize_x86_fp80: +; SSE: # %bb.0: +; SSE-NEXT: fldt (%rdi) +; SSE-NEXT: fld1 +; SSE-NEXT: fmulp %st, %st(1) +; SSE-NEXT: fstpt (%rdi) +; SSE-NEXT: retq +; +; SSE2-LABEL: v_test_canonicalize_x86_fp80: +; SSE2: # %bb.0: +; SSE2-NEXT: fldt (%rdi) +; SSE2-NEXT: fld1 +; SSE2-NEXT: fmulp %st, %st(1) +; SSE2-NEXT: fstpt (%rdi) +; SSE2-NEXT: retq +; +; AVX-LABEL: v_test_canonicalize_x86_fp80: +; AVX: # %bb.0: +; AVX-NEXT: fldt (%rdi) +; AVX-NEXT: fld1 +; AVX-NEXT: fmulp %st, %st(1) +; AVX-NEXT: fstpt (%rdi) +; AVX-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize_x86_fp80: +; AVX2: # %bb.0: +; AVX2-NEXT: fldt (%rdi) +; AVX2-NEXT: fld1 +; AVX2-NEXT: fmulp %st, %st(1) +; AVX2-NEXT: fstpt (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize_x86_fp80: +; AVX512F: # %bb.0: +; AVX512F-NEXT: fldt (%rdi) +; AVX512F-NEXT: fld1 +; AVX512F-NEXT: fmulp %st, %st(1) +; AVX512F-NEXT: fstpt (%rdi) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize_x86_fp80: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: fldt (%rdi) +; AVX512BW-NEXT: fld1 +; AVX512BW-NEXT: fmulp %st, %st(1) +; AVX512BW-NEXT: fstpt (%rdi) +; AVX512BW-NEXT: retq %val = load x86_fp80, x86_fp80 addrspace(1)* %out %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %val) store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out @@ -145,11 +786,127 @@ define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { } define void @v_test_canonicalize__half(half addrspace(1)* %out) { -; CHECK-LABEL: v_test_canonicalize__half: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmovsh (%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vmovsh %xmm0, (%rdi) -; CHECK-NEXT: retq +; SSE-LABEL: v_test_canonicalize__half: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rbx +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: subq $16, %rsp +; SSE-NEXT: .cfi_def_cfa_offset 32 +; SSE-NEXT: .cfi_offset %rbx, -16 +; SSE-NEXT: movq %rdi, %rbx +; SSE-NEXT: pinsrw $0, (%rdi), %xmm0 +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: pextrw $0, %xmm0, %eax +; SSE-NEXT: movw %ax, (%rbx) +; SSE-NEXT: addq $16, %rsp +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: popq %rbx +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; SSE2-LABEL: v_test_canonicalize__half: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pushq %rbx +; SSE2-NEXT: .cfi_def_cfa_offset 16 +; SSE2-NEXT: subq $16, %rsp +; SSE2-NEXT: .cfi_def_cfa_offset 32 +; SSE2-NEXT: .cfi_offset %rbx, -16 +; SSE2-NEXT: movq %rdi, %rbx +; SSE2-NEXT: pinsrw $0, (%rdi), %xmm0 +; SSE2-NEXT: callq __extendhfsf2@PLT +; SSE2-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: callq __extendhfsf2@PLT +; SSE2-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: callq __truncsfhf2@PLT +; SSE2-NEXT: pextrw $0, %xmm0, %eax +; SSE2-NEXT: movw %ax, (%rbx) +; SSE2-NEXT: addq $16, %rsp +; SSE2-NEXT: .cfi_def_cfa_offset 16 +; SSE2-NEXT: popq %rbx +; SSE2-NEXT: .cfi_def_cfa_offset 8 +; SSE2-NEXT: retq +; +; AVX-LABEL: v_test_canonicalize__half: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rbx +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: subq $16, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: .cfi_offset %rbx, -16 +; AVX-NEXT: movq %rdi, %rbx +; AVX-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 +; AVX-NEXT: callq __extendhfsf2@PLT +; AVX-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: callq __extendhfsf2@PLT +; AVX-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX-NEXT: callq __truncsfhf2@PLT +; AVX-NEXT: vpextrw $0, %xmm0, (%rbx) +; AVX-NEXT: addq $16, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: popq %rbx +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize__half: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: .cfi_def_cfa_offset 16 +; AVX2-NEXT: subq $16, %rsp +; AVX2-NEXT: .cfi_def_cfa_offset 32 +; AVX2-NEXT: .cfi_offset %rbx, -16 +; AVX2-NEXT: movq %rdi, %rbx +; AVX2-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: vpextrw $0, %xmm0, (%rbx) +; AVX2-NEXT: addq $16, %rsp +; AVX2-NEXT: .cfi_def_cfa_offset 16 +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: .cfi_def_cfa_offset 8 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize__half: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: movzwl (%rdi), %eax +; AVX512F-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm0 +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512F-NEXT: vmovd %eax, %xmm1 +; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX512F-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512F-NEXT: vmovd %xmm0, %eax +; AVX512F-NEXT: movw %ax, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize__half: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: movzwl (%rdi), %eax +; AVX512BW-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx +; AVX512BW-NEXT: vmovd %ecx, %xmm0 +; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512BW-NEXT: vmovd %eax, %xmm1 +; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX512BW-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: movw %ax, (%rdi) +; AVX512BW-NEXT: retq entry: %val = load half, half addrspace(1)* %out %canonicalized = call half @llvm.canonicalize.f16(half %val) @@ -158,11 +915,47 @@ entry: } define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 { - ; CHECK-LAEBL: v_test_canonicalize_var_f64 - ; CHECK: # %bb.0: - ; CHECK-NEXT: vmovsd (%rdi), %xmm0 # xmm0 = mem[0],zero - ; CHECK-NEXT: vmovsd %xmm0, (%rdi) - ; CHECK-NEXT: retq +; SSE-LABEL: v_test_canonicalize_var_f64: +; SSE: # %bb.0: +; SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; SSE-NEXT: mulsd (%rdi), %xmm0 +; SSE-NEXT: movsd %xmm0, (%rdi) +; SSE-NEXT: retq +; +; SSE2-LABEL: v_test_canonicalize_var_f64: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; SSE2-NEXT: mulsd (%rdi), %xmm0 +; SSE2-NEXT: movsd %xmm0, (%rdi) +; SSE2-NEXT: retq +; +; AVX-LABEL: v_test_canonicalize_var_f64: +; AVX: # %bb.0: +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 +; AVX-NEXT: vmovsd %xmm0, (%rdi) +; AVX-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize_var_f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; AVX2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 +; AVX2-NEXT: vmovsd %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize_var_f64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; AVX512F-NEXT: vmulsd (%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vmovsd %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize_var_f64: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; AVX512BW-NEXT: vmulsd (%rdi), %xmm0, %xmm0 +; AVX512BW-NEXT: vmovsd %xmm0, (%rdi) +; AVX512BW-NEXT: retq %val = load double, double addrspace(1)* %out %canonicalized = call double @llvm.canonicalize.f64(double %val) store double %canonicalized, double addrspace(1)* %out @@ -170,15 +963,29 @@ define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 { } define void @v_test_canonicalize__bfloat(bfloat addrspace(1)* %out) { -; CHECK-LABEL: v_test_canonicalize__bfloat: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzwl (%rdi), %eax -; CHECK-NEXT: shll $16, %eax -; CHECK-NEXT: vmovd %eax, %xmm0 -; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 -; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi) -; CHECK-NEXT: retq - +; SSE-LABEL: v_test_canonicalize__bfloat: +; SSE: # %bb.0: # %entry +; SSE-NEXT: retq +; +; SSE2-LABEL: v_test_canonicalize__bfloat: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: retq +; +; AVX-LABEL: v_test_canonicalize__bfloat: +; AVX: # %bb.0: # %entry +; AVX-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize__bfloat: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize__bfloat: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize__bfloat: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: retq entry: %val = load bfloat, bfloat addrspace(1)* %out %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %val) @@ -190,4 +997,4 @@ declare double @llvm.canonicalize.f64(double) declare float @llvm.canonicalize.f32(float) declare bfloat @llvm.canonicalize.bf16(bfloat) declare x86_fp80 @llvm.canonicalize.f80(x86_fp80) -declare half @llvm.canonicalize.f16(half) \ No newline at end of file +declare half @llvm.canonicalize.f16(half) From d40523083c236995400c5d44444fdfdd20560d71 Mon Sep 17 00:00:00 2001 From: Pawan Anil Nirpal Date: Fri, 6 Sep 2024 10:49:32 +0200 Subject: [PATCH 03/10] addressing review comments, simplify condtions --- llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4b9f0326e4d46..c9227be5d6c29 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58077,14 +58077,12 @@ SDValue combineConstantCanonicalize(SDNode *Node, SelectionDAG &DAG) { DAG.getMachineFunction().getDenormalMode(C.getSemantics()); assert((Mode != DenormalMode::getPositiveZero()) && "Positive denormal mode is not valid for X86 target."); - if (Mode == DenormalMode::getPreserveSign()) { - SDValue SDZero = - DAG.getConstantFP((C.isNegative() ? -0.0 : 0.0), dl, VT); - return SDZero; - } else if (Mode == DenormalMode::getIEEE()) { + if (Mode == DenormalMode::getPreserveSign()) + return DAG.getConstantFP((C.isNegative() ? -0.0 : 0.0), dl, VT); + if (Mode == DenormalMode::getIEEE() || Mode == DenormalMode::getDynamic()) return Operand; - } - } else if (C.isNaN() && C.isSignaling()) { + } + if (C.isNaN() && C.isSignaling()) { APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics()); SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT); return QuitNaN; @@ -58094,7 +58092,7 @@ SDValue combineConstantCanonicalize(SDNode *Node, SelectionDAG &DAG) { } SDValue findLastStrictOpChain(SDNode *N, SelectionDAG &DAG) { - assert(N!=nullptr && "Trying to find last chain for a NULL Node"); + assert(N != nullptr && "Trying to find last chain for a NULL Node"); for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDValue Op = N->getOperand(i); if (Op.getValueType() == MVT::Other && Op.getNode()->isStrictFPOpcode()) @@ -58104,10 +58102,10 @@ SDValue findLastStrictOpChain(SDNode *N, SelectionDAG &DAG) { } bool isNonCanonicalizingOperation(SDNode *N) { - assert(N!=nullptr && "Trying to check canonical opcode for a NULL Node"); + assert(N != nullptr && "Trying to check canonical opcode for a NULL Node"); unsigned Opc = N->getOpcode(); switch (Opc) { - // Ensure these are the exasustive set of non canonicalizing opcodes. Add more + // Ensure these are the exhaustive set of non canonicalizing opcodes. Add more // if not. case X86::RET: case ISD::STORE: From 317dd6f68da03de47fe525fbc5453494ec16c059 Mon Sep 17 00:00:00 2001 From: Pawan Anil Nirpal Date: Tue, 10 Sep 2024 15:10:50 +0200 Subject: [PATCH 04/10] Removed constant folding for another patch, moving undef canonicalize to generic dag combiner --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 + llvm/lib/Target/X86/X86ISelLowering.cpp | 94 +- .../CodeGen/X86/canonicalize-constants.ll | 583 ----- .../CodeGen/X86/canonicalize-subnormals.ll | 1888 ----------------- llvm/test/CodeGen/X86/canonicalize-vars.ll | 441 ++-- 5 files changed, 164 insertions(+), 2852 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/canonicalize-constants.ll delete mode 100644 llvm/test/CodeGen/X86/canonicalize-subnormals.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 37272a09b336a..ef989ea319027 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1980,6 +1980,16 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FREEZE: return visitFREEZE(N); case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N); case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N); + case ISD::FCANONICALIZE:{ + SDValue Operand = N->getOperand(0); + EVT VT = Operand.getValueType(); + SDLoc dl(N); + if(Operand.isUndef()){ + APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics()); + return DAG.getConstantFP(CanonicalQNaN, dl, VT); + } + break; + } case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c9227be5d6c29..8dcb52ac611d6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58066,81 +58066,11 @@ static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG, return SDValue(); } -SDValue combineConstantCanonicalize(SDNode *Node, SelectionDAG &DAG) { - SDValue Operand = Node->getOperand(0); - SDLoc dl(Node); - EVT VT = Operand.getValueType(); - if (ConstantFPSDNode *CFP = dyn_cast(Operand)) { - const APFloat &C = CFP->getValueAPF(); - if (C.isDenormal()) { - DenormalMode Mode = - DAG.getMachineFunction().getDenormalMode(C.getSemantics()); - assert((Mode != DenormalMode::getPositiveZero()) && - "Positive denormal mode is not valid for X86 target."); - if (Mode == DenormalMode::getPreserveSign()) - return DAG.getConstantFP((C.isNegative() ? -0.0 : 0.0), dl, VT); - if (Mode == DenormalMode::getIEEE() || Mode == DenormalMode::getDynamic()) - return Operand; - } - if (C.isNaN() && C.isSignaling()) { - APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics()); - SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT); - return QuitNaN; - } - } - return Operand; -} - -SDValue findLastStrictOpChain(SDNode *N, SelectionDAG &DAG) { - assert(N != nullptr && "Trying to find last chain for a NULL Node"); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - SDValue Op = N->getOperand(i); - if (Op.getValueType() == MVT::Other && Op.getNode()->isStrictFPOpcode()) - return Op; - } - return DAG.getEntryNode(); -} - -bool isNonCanonicalizingOperation(SDNode *N) { - assert(N != nullptr && "Trying to check canonical opcode for a NULL Node"); - unsigned Opc = N->getOpcode(); - switch (Opc) { - // Ensure these are the exhaustive set of non canonicalizing opcodes. Add more - // if not. - case X86::RET: - case ISD::STORE: - case ISD::SETCC: - case X86ISD::FCMP: - return true; - default: - return false; - } -} - -bool isUsedByNonCanonicalizingOp(SDNode *N) { - for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E; - ++UI) { - SDNode *User = *UI; - if (isNonCanonicalizingOperation(User)) - return true; - } - return false; -} - SDValue combineCanonicalize(SDNode *Node, SelectionDAG &DAG) { SDValue Operand = Node->getOperand(0); EVT VT = Operand.getValueType(); SDLoc dl(Node); - if (auto *CFP = dyn_cast(Operand)) - return combineConstantCanonicalize(Node, DAG); - - if (Operand.isUndef()) { - APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics()); - SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT); - return QuitNaN; - } - // Canonicalize scalar variable FP Nodes. SDValue MulNode; SDValue One; @@ -58157,28 +58087,18 @@ SDValue combineCanonicalize(SDNode *Node, SelectionDAG &DAG) { One = DAG.getConstantFP(Val, dl, VT); } else { // Is it better to assert? when we encounter an unknown FP type,Than to - // just replace with the operand! As this might be our last attempt at - // legalization. + // just replace with the operand! return Operand; } - // Store, return, and compare are non-canonicalizing operations. If a - // non-canonicalizing operation uses the rest then mul * 1.0 must be generated - // int those cases. - // TODO: For now Preventing bf16 from generating strict_fmul as it - // leads to a crash SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0, + // TODO: Fix Crash for bf16 when generating strict_fmul as it + // leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0, // ConstantFP:bf16, t5 LLVM ERROR: Do not know how to soft // promote this operator's result! - if (isUsedByNonCanonicalizingOp(Node) && VT != MVT::bf16) { - SDValue Chain = findLastStrictOpChain(Node, DAG); - // TODO : Follow-up with tablegen pattern to generate mul * 1.0. - SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other}, - {Chain, One, Operand}); - - return StrictFmul; - } - - return Operand; + SDValue Chain = DAG.getEntryNode(); + SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other}, + {Chain, One, Operand}); + return StrictFmul; // TODO : Hanlde vectors. } diff --git a/llvm/test/CodeGen/X86/canonicalize-constants.ll b/llvm/test/CodeGen/X86/canonicalize-constants.ll deleted file mode 100644 index b1a9733806d40..0000000000000 --- a/llvm/test/CodeGen/X86/canonicalize-constants.ll +++ /dev/null @@ -1,583 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5 -; RUN: llc -mattr=sse -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE -; RUN: llc -mattr=sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE2 -; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX -; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX2 -; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512F -; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512BW - -define float @canon_fp32() { -; SSE-LABEL: canon_fp32: -; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] -; SSE-NEXT: retq -; -; SSE2-LABEL: canon_fp32: -; SSE2: # %bb.0: -; SSE2-NEXT: movss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] -; SSE2-NEXT: retq -; -; AVX-LABEL: canon_fp32: -; AVX: # %bb.0: -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] -; AVX-NEXT: retq -; -; AVX2-LABEL: canon_fp32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] -; AVX2-NEXT: retq -; -; AVX512F-LABEL: canon_fp32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: canon_fp32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] -; AVX512BW-NEXT: retq - %canonicalized = call float @llvm.canonicalize.f32(float 3.0) - ret float %canonicalized -} - -define half @canon_fp16() { -; SSE-LABEL: canon_fp16: -; SSE: # %bb.0: -; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: retq -; -; SSE2-LABEL: canon_fp16: -; SSE2: # %bb.0: -; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: retq -; -; AVX-LABEL: canon_fp16: -; AVX: # %bb.0: -; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: retq -; -; AVX2-LABEL: canon_fp16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: canon_fp16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: canon_fp16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: retq - %canonicalized = call half @llvm.canonicalize.f16(half 0xH4200) ; half 3.0 - ret half %canonicalized -} - -define double @canon_fp64() { -; SSE-LABEL: canon_fp64: -; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] -; SSE-NEXT: retq -; -; SSE2-LABEL: canon_fp64: -; SSE2: # %bb.0: -; SSE2-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] -; SSE2-NEXT: retq -; -; AVX-LABEL: canon_fp64: -; AVX: # %bb.0: -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] -; AVX-NEXT: retq -; -; AVX2-LABEL: canon_fp64: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] -; AVX2-NEXT: retq -; -; AVX512F-LABEL: canon_fp64: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: canon_fp64: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] -; AVX512BW-NEXT: retq - %canonicalized = call double @llvm.canonicalize.f64(double 3.0) - ret double %canonicalized -} - -define x86_fp80 @canon_fp80() { -; SSE-LABEL: canon_fp80: -; SSE: # %bb.0: -; SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; SSE-NEXT: retq -; -; SSE2-LABEL: canon_fp80: -; SSE2: # %bb.0: -; SSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; SSE2-NEXT: retq -; -; AVX-LABEL: canon_fp80: -; AVX: # %bb.0: -; AVX-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX-NEXT: retq -; -; AVX2-LABEL: canon_fp80: -; AVX2: # %bb.0: -; AVX2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: canon_fp80: -; AVX512F: # %bb.0: -; AVX512F-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: canon_fp80: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX512BW-NEXT: retq - %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000) ; 90.0 - ret x86_fp80 %canonicalized -} - - -define x86_fp80 @complex_canonicalize_x86_fp80(x86_fp80 %a, x86_fp80 %b) { -; SSE-LABEL: complex_canonicalize_x86_fp80: -; SSE: # %bb.0: # %entry -; SSE-NEXT: fldt {{[0-9]+}}(%rsp) -; SSE-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; SSE-NEXT: retq -; -; SSE2-LABEL: complex_canonicalize_x86_fp80: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) -; SSE2-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; SSE2-NEXT: retq -; -; AVX-LABEL: complex_canonicalize_x86_fp80: -; AVX: # %bb.0: # %entry -; AVX-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX-NEXT: retq -; -; AVX2-LABEL: complex_canonicalize_x86_fp80: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX2-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: complex_canonicalize_x86_fp80: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX512F-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: complex_canonicalize_x86_fp80: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX512BW-NEXT: fsubrs {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX512BW-NEXT: retq -entry: - %mul1 = fsub x86_fp80 %a, %b - %add = fadd x86_fp80 %mul1, %b - %mul2 = fsub x86_fp80 %add, %mul1 - %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000) - %result = fsub x86_fp80 %canonicalized, %b - ret x86_fp80 %result -} - -define double @complex_canonicalize_fp64(double %a, double %b) unnamed_addr #0 { -; SSE-LABEL: complex_canonicalize_fp64: -; SSE: # %bb.0: # %start -; SSE-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] -; SSE-NEXT: retq -; -; SSE2-LABEL: complex_canonicalize_fp64: -; SSE2: # %bb.0: # %start -; SSE2-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] -; SSE2-NEXT: retq -; -; AVX-LABEL: complex_canonicalize_fp64: -; AVX: # %bb.0: # %start -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] -; AVX-NEXT: retq -; -; AVX2-LABEL: complex_canonicalize_fp64: -; AVX2: # %bb.0: # %start -; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] -; AVX2-NEXT: retq -; -; AVX512F-LABEL: complex_canonicalize_fp64: -; AVX512F: # %bb.0: # %start -; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: complex_canonicalize_fp64: -; AVX512BW: # %bb.0: # %start -; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] -; AVX512BW-NEXT: retq -start: - %c = fcmp olt double %a, %b - %d = fcmp uno double %a, 0.000000e+00 - %or.cond.i.i = or i1 %d, %c - %e = select i1 %or.cond.i.i, double %b, double %a - %f = tail call double @llvm.canonicalize.f64(double 3.0) #2 - ret double %f -} - -define void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 { -; SSE-LABEL: test_fold_canonicalize_p0_f32: -; SSE: # %bb.0: -; SSE-NEXT: movl $0, (%rdi) -; SSE-NEXT: retq -; -; SSE2-LABEL: test_fold_canonicalize_p0_f32: -; SSE2: # %bb.0: -; SSE2-NEXT: movl $0, (%rdi) -; SSE2-NEXT: retq -; -; AVX-LABEL: test_fold_canonicalize_p0_f32: -; AVX: # %bb.0: -; AVX-NEXT: movl $0, (%rdi) -; AVX-NEXT: retq -; -; AVX2-LABEL: test_fold_canonicalize_p0_f32: -; AVX2: # %bb.0: -; AVX2-NEXT: movl $0, (%rdi) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: test_fold_canonicalize_p0_f32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: movl $0, (%rdi) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: test_fold_canonicalize_p0_f32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: movl $0, (%rdi) -; AVX512BW-NEXT: retq - %canonicalized = call float @llvm.canonicalize.f32(float 0.0) - store float %canonicalized, float addrspace(1)* %out - ret void -} - -define void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 { -; SSE-LABEL: test_fold_canonicalize_n0_f32: -; SSE: # %bb.0: -; SSE-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; SSE-NEXT: retq -; -; SSE2-LABEL: test_fold_canonicalize_n0_f32: -; SSE2: # %bb.0: -; SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; SSE2-NEXT: retq -; -; AVX-LABEL: test_fold_canonicalize_n0_f32: -; AVX: # %bb.0: -; AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; AVX-NEXT: retq -; -; AVX2-LABEL: test_fold_canonicalize_n0_f32: -; AVX2: # %bb.0: -; AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: test_fold_canonicalize_n0_f32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: test_fold_canonicalize_n0_f32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; AVX512BW-NEXT: retq - %canonicalized = call float @llvm.canonicalize.f32(float -0.0) - store float %canonicalized, float addrspace(1)* %out - ret void -} - - -define void @v_test_canonicalize_p90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { -; SSE-LABEL: v_test_canonicalize_p90_x86_fp80: -; SSE: # %bb.0: -; SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; SSE-NEXT: fstpt (%rdi) -; SSE-NEXT: retq -; -; SSE2-LABEL: v_test_canonicalize_p90_x86_fp80: -; SSE2: # %bb.0: -; SSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; SSE2-NEXT: fstpt (%rdi) -; SSE2-NEXT: retq -; -; AVX-LABEL: v_test_canonicalize_p90_x86_fp80: -; AVX: # %bb.0: -; AVX-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX-NEXT: fstpt (%rdi) -; AVX-NEXT: retq -; -; AVX2-LABEL: v_test_canonicalize_p90_x86_fp80: -; AVX2: # %bb.0: -; AVX2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX2-NEXT: fstpt (%rdi) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: v_test_canonicalize_p90_x86_fp80: -; AVX512F: # %bb.0: -; AVX512F-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX512F-NEXT: fstpt (%rdi) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: v_test_canonicalize_p90_x86_fp80: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX512BW-NEXT: fstpt (%rdi) -; AVX512BW-NEXT: retq - %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000) - store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out - ret void -} - -define void @v_test_canonicalize_p3__half(half addrspace(1)* %out) { -; SSE-LABEL: v_test_canonicalize_p3__half: -; SSE: # %bb.0: # %entry -; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: pextrw $0, %xmm0, %eax -; SSE-NEXT: movw %ax, (%rdi) -; SSE-NEXT: retq -; -; SSE2-LABEL: v_test_canonicalize_p3__half: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pextrw $0, %xmm0, %eax -; SSE2-NEXT: movw %ax, (%rdi) -; SSE2-NEXT: retq -; -; AVX-LABEL: v_test_canonicalize_p3__half: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX-NEXT: retq -; -; AVX2-LABEL: v_test_canonicalize_p3__half: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: v_test_canonicalize_p3__half: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: v_test_canonicalize_p3__half: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX512BW-NEXT: retq -entry: - %canonicalized = call half @llvm.canonicalize.f16(half 0xH4200) - store half %canonicalized, half addrspace(1)* %out - ret void -} - -define void @v_test_canonicalize_p3_f64(double addrspace(1)* %out) #1 { -; SSE-LABEL: v_test_canonicalize_p3_f64: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 -; SSE-NEXT: movq %rax, (%rdi) -; SSE-NEXT: retq -; -; SSE2-LABEL: v_test_canonicalize_p3_f64: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 -; SSE2-NEXT: movq %rax, (%rdi) -; SSE2-NEXT: retq -; -; AVX-LABEL: v_test_canonicalize_p3_f64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 -; AVX-NEXT: movq %rax, (%rdi) -; AVX-NEXT: retq -; -; AVX2-LABEL: v_test_canonicalize_p3_f64: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 -; AVX2-NEXT: movq %rax, (%rdi) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: v_test_canonicalize_p3_f64: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 -; AVX512F-NEXT: movq %rax, (%rdi) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: v_test_canonicalize_p3_f64: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 -; AVX512BW-NEXT: movq %rax, (%rdi) -; AVX512BW-NEXT: retq -entry: - %canonicalized = call double @llvm.canonicalize.f64(double 3.0) - store double %canonicalized, double addrspace(1)* %out - ret void -} - -define void @v_test_canonicalize_p3__bfloat(bfloat addrspace(1)* %out) { -; SSE-LABEL: v_test_canonicalize_p3__bfloat: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movw $16448, (%rdi) # imm = 0x4040 -; SSE-NEXT: retq -; -; SSE2-LABEL: v_test_canonicalize_p3__bfloat: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movw $16448, (%rdi) # imm = 0x4040 -; SSE2-NEXT: retq -; -; AVX-LABEL: v_test_canonicalize_p3__bfloat: -; AVX: # %bb.0: # %entry -; AVX-NEXT: movw $16448, (%rdi) # imm = 0x4040 -; AVX-NEXT: retq -; -; AVX2-LABEL: v_test_canonicalize_p3__bfloat: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movw $16448, (%rdi) # imm = 0x4040 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: v_test_canonicalize_p3__bfloat: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movw $16448, (%rdi) # imm = 0x4040 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: v_test_canonicalize_p3__bfloat: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movw $16448, (%rdi) # imm = 0x4040 -; AVX512BW-NEXT: retq -entry: - %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat 3.0) - store bfloat %canonicalized, bfloat addrspace(1)* %out - ret void -} - -define void @v_test_canonicalize_n3__bfloat(bfloat addrspace(1)* %out) { -; SSE-LABEL: v_test_canonicalize_n3__bfloat: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movw $-16320, (%rdi) # imm = 0xC040 -; SSE-NEXT: retq -; -; SSE2-LABEL: v_test_canonicalize_n3__bfloat: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movw $-16320, (%rdi) # imm = 0xC040 -; SSE2-NEXT: retq -; -; AVX-LABEL: v_test_canonicalize_n3__bfloat: -; AVX: # %bb.0: # %entry -; AVX-NEXT: movw $-16320, (%rdi) # imm = 0xC040 -; AVX-NEXT: retq -; -; AVX2-LABEL: v_test_canonicalize_n3__bfloat: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movw $-16320, (%rdi) # imm = 0xC040 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: v_test_canonicalize_n3__bfloat: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movw $-16320, (%rdi) # imm = 0xC040 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: v_test_canonicalize_n3__bfloat: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movw $-16320, (%rdi) # imm = 0xC040 -; AVX512BW-NEXT: retq -entry: - %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat -3.0) - store bfloat %canonicalized, bfloat addrspace(1)* %out - ret void -} - -define void @v_test_canonicalize_n90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { -; SSE-LABEL: v_test_canonicalize_n90_x86_fp80: -; SSE: # %bb.0: -; SSE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; SSE-NEXT: fstpt (%rdi) -; SSE-NEXT: retq -; -; SSE2-LABEL: v_test_canonicalize_n90_x86_fp80: -; SSE2: # %bb.0: -; SSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; SSE2-NEXT: fstpt (%rdi) -; SSE2-NEXT: retq -; -; AVX-LABEL: v_test_canonicalize_n90_x86_fp80: -; AVX: # %bb.0: -; AVX-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX-NEXT: fstpt (%rdi) -; AVX-NEXT: retq -; -; AVX2-LABEL: v_test_canonicalize_n90_x86_fp80: -; AVX2: # %bb.0: -; AVX2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX2-NEXT: fstpt (%rdi) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: v_test_canonicalize_n90_x86_fp80: -; AVX512F: # %bb.0: -; AVX512F-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX512F-NEXT: fstpt (%rdi) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: v_test_canonicalize_n90_x86_fp80: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; AVX512BW-NEXT: fstpt (%rdi) -; AVX512BW-NEXT: retq - %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xKC005B400000000000000) - store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out - ret void -} - -define void @v_test_canonicalize_n3__half(half addrspace(1)* %out) { -; SSE-LABEL: v_test_canonicalize_n3__half: -; SSE: # %bb.0: # %entry -; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: pextrw $0, %xmm0, %eax -; SSE-NEXT: movw %ax, (%rdi) -; SSE-NEXT: retq -; -; SSE2-LABEL: v_test_canonicalize_n3__half: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pextrw $0, %xmm0, %eax -; SSE2-NEXT: movw %ax, (%rdi) -; SSE2-NEXT: retq -; -; AVX-LABEL: v_test_canonicalize_n3__half: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX-NEXT: retq -; -; AVX2-LABEL: v_test_canonicalize_n3__half: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: v_test_canonicalize_n3__half: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: v_test_canonicalize_n3__half: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) -; AVX512BW-NEXT: retq -entry: - %canonicalized = call half @llvm.canonicalize.f16(half 0xHC200) - store half %canonicalized, half addrspace(1)* %out - ret void -} diff --git a/llvm/test/CodeGen/X86/canonicalize-subnormals.ll b/llvm/test/CodeGen/X86/canonicalize-subnormals.ll deleted file mode 100644 index 034da96271eb8..0000000000000 --- a/llvm/test/CodeGen/X86/canonicalize-subnormals.ll +++ /dev/null @@ -1,1888 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5 -; RUN: llc -mattr=sse2 -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-SSE2 %s -; RUN: llc -mattr=sse2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-SSE2 %s -; RUN: llc -mattr=sse2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-SSE2 %s -; RUN: llc -mattr=+avx -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX %s -; RUN: llc -mattr=+avx -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX %s -; RUN: llc -mattr=+avx -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX %s -; RUN: llc -mattr=+avx2 -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX2 %s -; RUN: llc -mattr=+avx2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX2 %s -; RUN: llc -mattr=+avx2 -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX2 %s -; RUN: llc -mattr=+avx512f -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX512F %s -; RUN: llc -mattr=+avx512f -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX512F %s -; RUN: llc -mattr=+avx512f -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX512F %s -; RUN: llc -mattr=+avx512bw -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=PRE-SIGN-AVX512BW %s -; RUN: llc -mattr=+avx512bw -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL-AVX512BW %s -; RUN: llc -mattr=+avx512bw -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL-AVX512BW %s - -define double @test_bad_subnormal() { -; PRE-SIGN-SSE2-LABEL: test_bad_subnormal: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: test_bad_subnormal: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: test_bad_subnormal: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: test_bad_subnormal: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: test_bad_subnormal: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: test_bad_subnormal: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: test_bad_subnormal: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: test_bad_subnormal: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: test_bad_subnormal: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: test_bad_subnormal: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: test_bad_subnormal: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: test_bad_subnormal: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: test_bad_subnormal: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: test_bad_subnormal: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: test_bad_subnormal: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [NaN,0.0E+0] -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canon = call double @llvm.canonicalize(double 0x7ff8000000000001) ; Nan - ret double %canon -} - -define void @canonicalize_denormal1_f32_pre_sign(float addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f32_pre_sign: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_pre_sign: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_pre_sign: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f32_pre_sign: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_pre_sign: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_pre_sign: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f32_pre_sign: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_pre_sign: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_pre_sign: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f32_pre_sign: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_pre_sign: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_pre_sign: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f32_pre_sign: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_pre_sign: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_pre_sign: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) - store float %canonicalized, float addrspace(1)* %out - ret void -} - -define void @canonicalize_denormal1_f64_pre_sign(double addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f64_pre_sign: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_pre_sign: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_pre_sign: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f64_pre_sign: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_pre_sign: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_pre_sign: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f64_pre_sign: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_pre_sign: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_pre_sign: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f64_pre_sign: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_pre_sign: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_pre_sign: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f64_pre_sign: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_pre_sign: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_pre_sign: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) - store double %canonicalized, double addrspace(1)* %out - ret void -} - - -define void @canonicalize_qnan_f64(double addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_qnan_f64: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_qnan_f64: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_qnan_f64: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_qnan_f64: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_qnan_f64: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_qnan_f64: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_qnan_f64: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_qnan_f64: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_qnan_f64: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_qnan_f64: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_qnan_f64: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_qnan_f64: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_qnan_f64: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_f64: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_f64: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000) - store double %canonicalized, double addrspace(1)* %out - ret void -} - -define void @canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_qnan_value_neg1_f64: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movq $-1, (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg1_f64: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movq $-1, (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg1_f64: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movq $-1, (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_qnan_value_neg1_f64: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movq $-1, (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg1_f64: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movq $-1, (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg1_f64: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movq $-1, (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_qnan_value_neg1_f64: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movq $-1, (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg1_f64: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movq $-1, (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg1_f64: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movq $-1, (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_qnan_value_neg1_f64: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movq $-1, (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg1_f64: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movq $-1, (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg1_f64: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movq $-1, (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_qnan_value_neg1_f64: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movq $-1, (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg1_f64: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movq $-1, (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg1_f64: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movq $-1, (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double)) - store double %canonicalized, double addrspace(1)* %out - ret void -} - -define void @canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_qnan_value_neg2_f64: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movq $-2, (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg2_f64: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movq $-2, (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_qnan_value_neg2_f64: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movq $-2, (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_qnan_value_neg2_f64: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movq $-2, (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg2_f64: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movq $-2, (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_qnan_value_neg2_f64: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movq $-2, (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_qnan_value_neg2_f64: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movq $-2, (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg2_f64: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movq $-2, (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_qnan_value_neg2_f64: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movq $-2, (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_qnan_value_neg2_f64: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movq $-2, (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg2_f64: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movq $-2, (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_qnan_value_neg2_f64: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movq $-2, (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_qnan_value_neg2_f64: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movq $-2, (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg2_f64: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movq $-2, (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_qnan_value_neg2_f64: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movq $-2, (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double)) - store double %canonicalized, double addrspace(1)* %out - ret void -} - -define void @canonicalize_snan0_value_f64(double addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_snan0_value_f64: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_snan0_value_f64: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_snan0_value_f64: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_snan0_value_f64: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_snan0_value_f64: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_snan0_value_f64: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_snan0_value_f64: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_snan0_value_f64: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_snan0_value_f64: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_snan0_value_f64: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_snan0_value_f64: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_snan0_value_f64: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_snan0_value_f64: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_snan0_value_f64: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_snan0_value_f64: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double)) - store double %canonicalized, double addrspace(1)* %out - ret void -} - -define void @canonicalize_undef(double addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_undef: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_undef: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_undef: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_undef: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_undef: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_undef: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_undef: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_undef: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_undef: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_undef: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_undef: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_undef: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_undef: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_undef: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_undef: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call double @llvm.canonicalize.f64(double undef) - store double %canonicalized, double addrspace(1)* %out - ret void -} - -define void @canonicalize_denormal1_f32_ieee(float addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f32_ieee: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_ieee: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_ieee: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f32_ieee: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_ieee: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_ieee: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f32_ieee: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_ieee: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_ieee: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f32_ieee: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_ieee: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_ieee: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f32_ieee: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_ieee: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_ieee: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) - store float %canonicalized, float addrspace(1)* %out - ret void -} - -define void @canonicalize_denormal1_f64_ieee(double addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f64_ieee: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_ieee: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_ieee: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f64_ieee: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_ieee: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_ieee: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f64_ieee: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_ieee: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_ieee: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f64_ieee: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_ieee: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_ieee: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f64_ieee: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_ieee: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_ieee: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) - store double %canonicalized, double addrspace(1)* %out - ret void -} - -define void @canonicalize_denormal1_f32_dynamic(float addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f32_dynamic: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_dynamic: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f32_dynamic: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f32_dynamic: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_dynamic: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f32_dynamic: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f32_dynamic: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_dynamic: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f32_dynamic: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f32_dynamic: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_dynamic: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f32_dynamic: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f32_dynamic: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movl $-2147483648, (%rdi) # imm = 0x80000000 -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_dynamic: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f32_dynamic: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movl $-2139095041, (%rdi) # imm = 0x807FFFFF -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float)) - store float %canonicalized, float addrspace(1)* %out - ret void -} - -define void @canonicalize_denormal1_f64_dynamic(double addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_f64_dynamic: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-SSE2-NEXT: movq %rax, (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_dynamic: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_f64_dynamic: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-SSE2-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_f64_dynamic: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-AVX-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_dynamic: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-AVX-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_f64_dynamic: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-AVX-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_f64_dynamic: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-AVX2-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_dynamic: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_f64_dynamic: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-AVX2-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_f64_dynamic: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-AVX512F-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_dynamic: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_f64_dynamic: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-AVX512F-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_f64_dynamic: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; PRE-SIGN-AVX512BW-NEXT: movq %rax, (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_dynamic: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; IEEE-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_f64_dynamic: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movabsq $-9218868437227405313, %rax # imm = 0x800FFFFFFFFFFFFF -; DYN-DENORMAL-AVX512BW-NEXT: movq %rax, (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double)) - store double %canonicalized, double addrspace(1)* %out - ret void -} - -define void @canonicalize_denormal1_bfloat_pre_sign(bfloat addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_pre_sign: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat)) - store bfloat %canonicalized, bfloat addrspace(1)* %out - ret void -} - - -define void @canonicalize_denormal1_bfloat_ieee(bfloat addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_bfloat_ieee: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_ieee: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_ieee: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_bfloat_ieee: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_ieee: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_ieee: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_bfloat_ieee: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_ieee: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_ieee: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_bfloat_ieee: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_ieee: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_ieee: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_bfloat_ieee: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_ieee: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_ieee: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat)) - store bfloat %canonicalized, bfloat addrspace(1)* %out - ret void -} - - -define void @canonicalize_denormal1_bfloat_dynamic(bfloat addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_bfloat_dynamic: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_dynamic: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_bfloat_dynamic: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_bfloat_dynamic: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_dynamic: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_bfloat_dynamic: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_bfloat_dynamic: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_dynamic: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_bfloat_dynamic: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_bfloat_dynamic: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_dynamic: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_bfloat_dynamic: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_bfloat_dynamic: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_dynamic: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_bfloat_dynamic: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: movw $-32768, (%rdi) # imm = 0x8000 -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat)) - store bfloat %canonicalized, bfloat addrspace(1)* %out - ret void -} - -define void @canonicalize_denormal1_half_pre_sign(half addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_half_pre_sign: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; PRE-SIGN-SSE2-NEXT: pextrw $0, %xmm0, %eax -; PRE-SIGN-SSE2-NEXT: movw %ax, (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_pre_sign: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; IEEE-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax -; IEEE-DENORMAL-SSE2-NEXT: movw %ax, (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_pre_sign: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; DYN-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax -; DYN-DENORMAL-SSE2-NEXT: movw %ax, (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_half_pre_sign: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; PRE-SIGN-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_pre_sign: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; IEEE-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_pre_sign: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; DYN-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_half_pre_sign: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; PRE-SIGN-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_pre_sign: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; IEEE-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_pre_sign: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; DYN-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_half_pre_sign: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; PRE-SIGN-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_pre_sign: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; IEEE-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_pre_sign: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; DYN-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_half_pre_sign: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; PRE-SIGN-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_pre_sign: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; IEEE-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_pre_sign: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; DYN-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half)) - store half %canonicalized, half addrspace(1)* %out - ret void -} - - -define void @canonicalize_denormal1_half_ieee(half addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_half_ieee: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; PRE-SIGN-SSE2-NEXT: pextrw $0, %xmm0, %eax -; PRE-SIGN-SSE2-NEXT: movw %ax, (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_ieee: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; IEEE-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax -; IEEE-DENORMAL-SSE2-NEXT: movw %ax, (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_ieee: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; DYN-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax -; DYN-DENORMAL-SSE2-NEXT: movw %ax, (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_half_ieee: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; PRE-SIGN-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_ieee: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; IEEE-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_ieee: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; DYN-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_half_ieee: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; PRE-SIGN-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_ieee: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; IEEE-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_ieee: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; DYN-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_half_ieee: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; PRE-SIGN-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_ieee: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; IEEE-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_ieee: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; DYN-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_half_ieee: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; PRE-SIGN-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_ieee: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; IEEE-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_ieee: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; DYN-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half)) - store half %canonicalized, half addrspace(1)* %out - ret void -} - -define void @canonicalize_denormal1_half_dynamic(half addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_half_dynamic: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; PRE-SIGN-SSE2-NEXT: pextrw $0, %xmm0, %eax -; PRE-SIGN-SSE2-NEXT: movw %ax, (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_dynamic: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; IEEE-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax -; IEEE-DENORMAL-SSE2-NEXT: movw %ax, (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_half_dynamic: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; DYN-DENORMAL-SSE2-NEXT: pextrw $0, %xmm0, %eax -; DYN-DENORMAL-SSE2-NEXT: movw %ax, (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_half_dynamic: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; PRE-SIGN-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_dynamic: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; IEEE-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_half_dynamic: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; DYN-DENORMAL-AVX-NEXT: vpextrw $0, %xmm0, (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_half_dynamic: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; PRE-SIGN-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_dynamic: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; IEEE-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_half_dynamic: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; DYN-DENORMAL-AVX2-NEXT: vpextrw $0, %xmm0, (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_half_dynamic: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; PRE-SIGN-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_dynamic: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; IEEE-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_half_dynamic: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; DYN-DENORMAL-AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_half_dynamic: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; PRE-SIGN-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_dynamic: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; IEEE-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_half_dynamic: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; DYN-DENORMAL-AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call half @llvm.canonicalize.f16(half bitcast (i16 32768 to half)) - store half %canonicalized, half addrspace(1)* %out - ret void -} - -define void @canonicalize_denormal1_x86_fp80_pre_sign(x86_fp80 addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: fldz -; PRE-SIGN-SSE2-NEXT: fstpt (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-SSE2-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-SSE2-NEXT: fstpt (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: fldz -; PRE-SIGN-AVX-NEXT: fstpt (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-AVX-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-AVX-NEXT: fstpt (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: fldz -; PRE-SIGN-AVX2-NEXT: fstpt (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-AVX2-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-AVX2-NEXT: fstpt (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: fldz -; PRE-SIGN-AVX512F-NEXT: fstpt (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-AVX512F-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-AVX512F-NEXT: fstpt (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: fldz -; PRE-SIGN-AVX512BW-NEXT: fstpt (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-AVX512BW-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_pre_sign: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-AVX512BW-NEXT: fstpt (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001) - store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out - ret void -} - -define void @canonicalize_denormal1_x86_fp80_dynamic(x86_fp80 addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: fldz -; PRE-SIGN-SSE2-NEXT: fstpt (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-SSE2-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-SSE2-NEXT: fstpt (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: fldz -; PRE-SIGN-AVX-NEXT: fstpt (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-AVX-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-AVX-NEXT: fstpt (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: fldz -; PRE-SIGN-AVX2-NEXT: fstpt (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-AVX2-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-AVX2-NEXT: fstpt (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: fldz -; PRE-SIGN-AVX512F-NEXT: fstpt (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-AVX512F-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-AVX512F-NEXT: fstpt (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: fldz -; PRE-SIGN-AVX512BW-NEXT: fstpt (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-AVX512BW-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_dynamic: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-AVX512BW-NEXT: fstpt (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001) - store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out - ret void -} - -define void @canonicalize_denormal1_x86_fp80_ieee(x86_fp80 addrspace(1)* %out) { -; PRE-SIGN-SSE2-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; PRE-SIGN-SSE2: # %bb.0: -; PRE-SIGN-SSE2-NEXT: fldz -; PRE-SIGN-SSE2-NEXT: fstpt (%rdi) -; PRE-SIGN-SSE2-NEXT: retq -; -; IEEE-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; IEEE-DENORMAL-SSE2: # %bb.0: -; IEEE-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-SSE2-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-SSE2-NEXT: retq -; -; DYN-DENORMAL-SSE2-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; DYN-DENORMAL-SSE2: # %bb.0: -; DYN-DENORMAL-SSE2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-SSE2-NEXT: fstpt (%rdi) -; DYN-DENORMAL-SSE2-NEXT: retq -; -; PRE-SIGN-AVX-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; PRE-SIGN-AVX: # %bb.0: -; PRE-SIGN-AVX-NEXT: fldz -; PRE-SIGN-AVX-NEXT: fstpt (%rdi) -; PRE-SIGN-AVX-NEXT: retq -; -; IEEE-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; IEEE-DENORMAL-AVX: # %bb.0: -; IEEE-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-AVX-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-AVX-NEXT: retq -; -; DYN-DENORMAL-AVX-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; DYN-DENORMAL-AVX: # %bb.0: -; DYN-DENORMAL-AVX-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-AVX-NEXT: fstpt (%rdi) -; DYN-DENORMAL-AVX-NEXT: retq -; -; PRE-SIGN-AVX2-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; PRE-SIGN-AVX2: # %bb.0: -; PRE-SIGN-AVX2-NEXT: fldz -; PRE-SIGN-AVX2-NEXT: fstpt (%rdi) -; PRE-SIGN-AVX2-NEXT: retq -; -; IEEE-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; IEEE-DENORMAL-AVX2: # %bb.0: -; IEEE-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-AVX2-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-AVX2-NEXT: retq -; -; DYN-DENORMAL-AVX2-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; DYN-DENORMAL-AVX2: # %bb.0: -; DYN-DENORMAL-AVX2-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-AVX2-NEXT: fstpt (%rdi) -; DYN-DENORMAL-AVX2-NEXT: retq -; -; PRE-SIGN-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; PRE-SIGN-AVX512F: # %bb.0: -; PRE-SIGN-AVX512F-NEXT: fldz -; PRE-SIGN-AVX512F-NEXT: fstpt (%rdi) -; PRE-SIGN-AVX512F-NEXT: retq -; -; IEEE-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; IEEE-DENORMAL-AVX512F: # %bb.0: -; IEEE-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-AVX512F-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-AVX512F-NEXT: retq -; -; DYN-DENORMAL-AVX512F-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; DYN-DENORMAL-AVX512F: # %bb.0: -; DYN-DENORMAL-AVX512F-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-AVX512F-NEXT: fstpt (%rdi) -; DYN-DENORMAL-AVX512F-NEXT: retq -; -; PRE-SIGN-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; PRE-SIGN-AVX512BW: # %bb.0: -; PRE-SIGN-AVX512BW-NEXT: fldz -; PRE-SIGN-AVX512BW-NEXT: fstpt (%rdi) -; PRE-SIGN-AVX512BW-NEXT: retq -; -; IEEE-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; IEEE-DENORMAL-AVX512BW: # %bb.0: -; IEEE-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; IEEE-DENORMAL-AVX512BW-NEXT: fstpt (%rdi) -; IEEE-DENORMAL-AVX512BW-NEXT: retq -; -; DYN-DENORMAL-AVX512BW-LABEL: canonicalize_denormal1_x86_fp80_ieee: -; DYN-DENORMAL-AVX512BW: # %bb.0: -; DYN-DENORMAL-AVX512BW-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip) -; DYN-DENORMAL-AVX512BW-NEXT: fstpt (%rdi) -; DYN-DENORMAL-AVX512BW-NEXT: retq - %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK00000000000000000001) - store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out - ret void -} diff --git a/llvm/test/CodeGen/X86/canonicalize-vars.ll b/llvm/test/CodeGen/X86/canonicalize-vars.ll index 0075386c02361..a956449632470 100644 --- a/llvm/test/CodeGen/X86/canonicalize-vars.ll +++ b/llvm/test/CodeGen/X86/canonicalize-vars.ll @@ -10,26 +10,32 @@ define float @canon_fp32_varargsf32(float %a) { ; SSE-LABEL: canon_fp32_varargsf32: ; SSE: # %bb.0: +; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; SSE2-LABEL: canon_fp32_varargsf32: ; SSE2: # %bb.0: +; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: canon_fp32_varargsf32: ; AVX: # %bb.0: +; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX2-LABEL: canon_fp32_varargsf32: ; AVX2: # %bb.0: +; AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: canon_fp32_varargsf32: ; AVX512F: # %bb.0: +; AVX512F-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: canon_fp32_varargsf32: ; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512BW-NEXT: retq %canonicalized = call float @llvm.canonicalize.f32(float %a) ret float %canonicalized @@ -39,90 +45,80 @@ define x86_fp80 @canon_fp32_varargsf80(x86_fp80 %a) { ; SSE-LABEL: canon_fp32_varargsf80: ; SSE: # %bb.0: ; SSE-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE-NEXT: fld1 +; SSE-NEXT: fmulp %st, %st(1) ; SSE-NEXT: retq ; ; SSE2-LABEL: canon_fp32_varargsf80: ; SSE2: # %bb.0: ; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE2-NEXT: fld1 +; SSE2-NEXT: fmulp %st, %st(1) ; SSE2-NEXT: retq ; ; AVX-LABEL: canon_fp32_varargsf80: ; AVX: # %bb.0: ; AVX-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX-NEXT: fld1 +; AVX-NEXT: fmulp %st, %st(1) ; AVX-NEXT: retq ; ; AVX2-LABEL: canon_fp32_varargsf80: ; AVX2: # %bb.0: ; AVX2-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX2-NEXT: fld1 +; AVX2-NEXT: fmulp %st, %st(1) ; AVX2-NEXT: retq ; ; AVX512F-LABEL: canon_fp32_varargsf80: ; AVX512F: # %bb.0: ; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512F-NEXT: fld1 +; AVX512F-NEXT: fmulp %st, %st(1) ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: canon_fp32_varargsf80: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX512BW-NEXT: fld1 +; AVX512BW-NEXT: fmulp %st, %st(1) ; AVX512BW-NEXT: retq %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %a) ret x86_fp80 %canonicalized } -define bfloat @canon_fp32_varargsbf16(bfloat %a) { -; SSE-LABEL: canon_fp32_varargsbf16: -; SSE: # %bb.0: -; SSE-NEXT: retq -; -; SSE2-LABEL: canon_fp32_varargsbf16: -; SSE2: # %bb.0: -; SSE2-NEXT: retq -; -; AVX-LABEL: canon_fp32_varargsbf16: -; AVX: # %bb.0: -; AVX-NEXT: retq -; -; AVX2-LABEL: canon_fp32_varargsbf16: -; AVX2: # %bb.0: -; AVX2-NEXT: retq -; -; AVX512F-LABEL: canon_fp32_varargsbf16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: canon_fp32_varargsbf16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: retq - %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %a) - ret bfloat %canonicalized -} - define half @complex_canonicalize_fmul_half(half %a, half %b) { ; SSE-LABEL: complex_canonicalize_fmul_half: ; SSE: # %bb.0: # %entry ; SSE-NEXT: pushq %rax ; SSE-NEXT: .cfi_def_cfa_offset 16 -; SSE-NEXT: movss %xmm1, (%rsp) # 4-byte Spill +; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SSE-NEXT: movss (%rsp), %xmm0 # 4-byte Reload +; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill -; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: movss (%rsp), %xmm1 # 4-byte Reload ; SSE-NEXT: # xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: subss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: callq __truncsfhf2@PLT ; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SSE-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; SSE-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload ; SSE-NEXT: callq __truncsfhf2@PLT ; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE-NEXT: callq __truncsfhf2@PLT ; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: mulss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload ; SSE-NEXT: callq __truncsfhf2@PLT ; SSE-NEXT: popq %rax ; SSE-NEXT: .cfi_def_cfa_offset 8 @@ -132,27 +128,33 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) { ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: .cfi_def_cfa_offset 16 -; SSE2-NEXT: movss %xmm1, (%rsp) # 4-byte Spill +; SSE2-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SSE2-NEXT: movss (%rsp), %xmm0 # 4-byte Reload +; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill -; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload ; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: subss %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SSE2-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; SSE2-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: callq __extendhfsf2@PLT +; SSE2-NEXT: mulss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: callq __truncsfhf2@PLT +; SSE2-NEXT: callq __extendhfsf2@PLT +; SSE2-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload ; SSE2-NEXT: callq __truncsfhf2@PLT ; SSE2-NEXT: popq %rax ; SSE2-NEXT: .cfi_def_cfa_offset 8 @@ -162,26 +164,32 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) { ; AVX: # %bb.0: # %entry ; AVX-NEXT: pushq %rax ; AVX-NEXT: .cfi_def_cfa_offset 16 -; AVX-NEXT: vmovss %xmm1, (%rsp) # 4-byte Spill +; AVX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; AVX-NEXT: vmovss (%rsp), %xmm0 # 4-byte Reload +; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill -; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload ; AVX-NEXT: # xmm1 = mem[0],zero,zero,zero ; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 ; AVX-NEXT: callq __truncsfhf2@PLT ; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; AVX-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload ; AVX-NEXT: callq __truncsfhf2@PLT ; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload ; AVX-NEXT: callq __truncsfhf2@PLT ; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: callq __extendhfsf2@PLT +; AVX-NEXT: vmulss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX-NEXT: callq __truncsfhf2@PLT +; AVX-NEXT: callq __extendhfsf2@PLT +; AVX-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload ; AVX-NEXT: callq __truncsfhf2@PLT ; AVX-NEXT: popq %rax ; AVX-NEXT: .cfi_def_cfa_offset 8 @@ -191,26 +199,32 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) { ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: pushq %rax ; AVX2-NEXT: .cfi_def_cfa_offset 16 -; AVX2-NEXT: vmovss %xmm1, (%rsp) # 4-byte Spill +; AVX2-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; AVX2-NEXT: vmovss (%rsp), %xmm0 # 4-byte Reload +; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero ; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill -; AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX2-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload ; AVX2-NEXT: # xmm1 = mem[0],zero,zero,zero ; AVX2-NEXT: vsubss %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: callq __truncsfhf2@PLT ; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; AVX2-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX2-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload ; AVX2-NEXT: callq __truncsfhf2@PLT ; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload ; AVX2-NEXT: callq __truncsfhf2@PLT ; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmulss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload ; AVX2-NEXT: callq __truncsfhf2@PLT ; AVX2-NEXT: popq %rax ; AVX2-NEXT: .cfi_def_cfa_offset 8 @@ -232,6 +246,15 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) { ; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2 ; AVX512F-NEXT: vsubss %xmm0, %xmm2, %xmm0 ; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512F-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax +; AVX512F-NEXT: vmovd %eax, %xmm2 +; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX512F-NEXT: vmulss %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] +; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -255,6 +278,15 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) { ; AVX512BW-NEXT: vcvtph2ps %xmm2, %xmm2 ; AVX512BW-NEXT: vsubss %xmm0, %xmm2, %xmm0 ; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512BW-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax +; AVX512BW-NEXT: vmovd %eax, %xmm2 +; AVX512BW-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX512BW-NEXT: vmulss %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] +; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -280,6 +312,8 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) { ; SSE-NEXT: fld %st(0) ; SSE-NEXT: fadd %st(2), %st ; SSE-NEXT: fsubp %st, %st(1) +; SSE-NEXT: fld1 +; SSE-NEXT: fmulp %st, %st(1) ; SSE-NEXT: fsubp %st, %st(1) ; SSE-NEXT: retq ; @@ -291,6 +325,8 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) { ; SSE2-NEXT: fld %st(0) ; SSE2-NEXT: fadd %st(2), %st ; SSE2-NEXT: fsubp %st, %st(1) +; SSE2-NEXT: fld1 +; SSE2-NEXT: fmulp %st, %st(1) ; SSE2-NEXT: fsubp %st, %st(1) ; SSE2-NEXT: retq ; @@ -302,6 +338,8 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) { ; AVX-NEXT: fld %st(0) ; AVX-NEXT: fadd %st(2), %st ; AVX-NEXT: fsubp %st, %st(1) +; AVX-NEXT: fld1 +; AVX-NEXT: fmulp %st, %st(1) ; AVX-NEXT: fsubp %st, %st(1) ; AVX-NEXT: retq ; @@ -313,6 +351,8 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) { ; AVX2-NEXT: fld %st(0) ; AVX2-NEXT: fadd %st(2), %st ; AVX2-NEXT: fsubp %st, %st(1) +; AVX2-NEXT: fld1 +; AVX2-NEXT: fmulp %st, %st(1) ; AVX2-NEXT: fsubp %st, %st(1) ; AVX2-NEXT: retq ; @@ -324,6 +364,8 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) { ; AVX512F-NEXT: fld %st(0) ; AVX512F-NEXT: fadd %st(2), %st ; AVX512F-NEXT: fsubp %st, %st(1) +; AVX512F-NEXT: fld1 +; AVX512F-NEXT: fmulp %st, %st(1) ; AVX512F-NEXT: fsubp %st, %st(1) ; AVX512F-NEXT: retq ; @@ -335,6 +377,8 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) { ; AVX512BW-NEXT: fld %st(0) ; AVX512BW-NEXT: fadd %st(2), %st ; AVX512BW-NEXT: fsubp %st, %st(1) +; AVX512BW-NEXT: fld1 +; AVX512BW-NEXT: fmulp %st, %st(1) ; AVX512BW-NEXT: fsubp %st, %st(1) ; AVX512BW-NEXT: retq entry: @@ -347,214 +391,6 @@ entry: ret x86_fp80 %result } -define bfloat @complex_canonicalize_fmul_bfloat(bfloat %a, bfloat %b) { -; SSE-LABEL: complex_canonicalize_fmul_bfloat: -; SSE: # %bb.0: # %entry -; SSE-NEXT: pushq %rax -; SSE-NEXT: .cfi_def_cfa_offset 16 -; SSE-NEXT: pextrw $0, %xmm0, %eax -; SSE-NEXT: pextrw $0, %xmm1, %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: movd %ecx, %xmm1 -; SSE-NEXT: movd %xmm1, (%rsp) # 4-byte Folded Spill -; SSE-NEXT: shll $16, %eax -; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: subss %xmm1, %xmm0 -; SSE-NEXT: callq __truncsfbf2@PLT -; SSE-NEXT: pextrw $0, %xmm0, %eax -; SSE-NEXT: shll $16, %eax -; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; SSE-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload -; SSE-NEXT: callq __truncsfbf2@PLT -; SSE-NEXT: pextrw $0, %xmm0, %eax -; SSE-NEXT: shll $16, %eax -; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload -; SSE-NEXT: callq __truncsfbf2@PLT -; SSE-NEXT: pextrw $0, %xmm0, %eax -; SSE-NEXT: shll $16, %eax -; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload -; SSE-NEXT: callq __truncsfbf2@PLT -; SSE-NEXT: popq %rax -; SSE-NEXT: .cfi_def_cfa_offset 8 -; SSE-NEXT: retq -; -; SSE2-LABEL: complex_canonicalize_fmul_bfloat: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pushq %rax -; SSE2-NEXT: .cfi_def_cfa_offset 16 -; SSE2-NEXT: pextrw $0, %xmm0, %eax -; SSE2-NEXT: pextrw $0, %xmm1, %ecx -; SSE2-NEXT: shll $16, %ecx -; SSE2-NEXT: movd %ecx, %xmm1 -; SSE2-NEXT: movd %xmm1, (%rsp) # 4-byte Folded Spill -; SSE2-NEXT: shll $16, %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: subss %xmm1, %xmm0 -; SSE2-NEXT: callq __truncsfbf2@PLT -; SSE2-NEXT: pextrw $0, %xmm0, %eax -; SSE2-NEXT: shll $16, %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; SSE2-NEXT: addss (%rsp), %xmm0 # 4-byte Folded Reload -; SSE2-NEXT: callq __truncsfbf2@PLT -; SSE2-NEXT: pextrw $0, %xmm0, %eax -; SSE2-NEXT: shll $16, %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload -; SSE2-NEXT: callq __truncsfbf2@PLT -; SSE2-NEXT: pextrw $0, %xmm0, %eax -; SSE2-NEXT: shll $16, %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload -; SSE2-NEXT: callq __truncsfbf2@PLT -; SSE2-NEXT: popq %rax -; SSE2-NEXT: .cfi_def_cfa_offset 8 -; SSE2-NEXT: retq -; -; AVX-LABEL: complex_canonicalize_fmul_bfloat: -; AVX: # %bb.0: # %entry -; AVX-NEXT: pushq %rax -; AVX-NEXT: .cfi_def_cfa_offset 16 -; AVX-NEXT: vpextrw $0, %xmm0, %eax -; AVX-NEXT: vpextrw $0, %xmm1, %ecx -; AVX-NEXT: shll $16, %ecx -; AVX-NEXT: vmovd %ecx, %xmm1 -; AVX-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill -; AVX-NEXT: shll $16, %eax -; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: callq __truncsfbf2@PLT -; AVX-NEXT: vpextrw $0, %xmm0, %eax -; AVX-NEXT: shll $16, %eax -; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; AVX-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX-NEXT: callq __truncsfbf2@PLT -; AVX-NEXT: vpextrw $0, %xmm0, %eax -; AVX-NEXT: shll $16, %eax -; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX-NEXT: callq __truncsfbf2@PLT -; AVX-NEXT: vpextrw $0, %xmm0, %eax -; AVX-NEXT: shll $16, %eax -; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX-NEXT: callq __truncsfbf2@PLT -; AVX-NEXT: popq %rax -; AVX-NEXT: .cfi_def_cfa_offset 8 -; AVX-NEXT: retq -; -; AVX2-LABEL: complex_canonicalize_fmul_bfloat: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: pushq %rax -; AVX2-NEXT: .cfi_def_cfa_offset 16 -; AVX2-NEXT: vpextrw $0, %xmm0, %eax -; AVX2-NEXT: vpextrw $0, %xmm1, %ecx -; AVX2-NEXT: shll $16, %ecx -; AVX2-NEXT: vmovd %ecx, %xmm1 -; AVX2-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill -; AVX2-NEXT: shll $16, %eax -; AVX2-NEXT: vmovd %eax, %xmm0 -; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: callq __truncsfbf2@PLT -; AVX2-NEXT: vpextrw $0, %xmm0, %eax -; AVX2-NEXT: shll $16, %eax -; AVX2-NEXT: vmovd %eax, %xmm0 -; AVX2-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; AVX2-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX2-NEXT: callq __truncsfbf2@PLT -; AVX2-NEXT: vpextrw $0, %xmm0, %eax -; AVX2-NEXT: shll $16, %eax -; AVX2-NEXT: vmovd %eax, %xmm0 -; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX2-NEXT: callq __truncsfbf2@PLT -; AVX2-NEXT: vpextrw $0, %xmm0, %eax -; AVX2-NEXT: shll $16, %eax -; AVX2-NEXT: vmovd %eax, %xmm0 -; AVX2-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX2-NEXT: callq __truncsfbf2@PLT -; AVX2-NEXT: popq %rax -; AVX2-NEXT: .cfi_def_cfa_offset 8 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: complex_canonicalize_fmul_bfloat: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: pushq %rax -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: vpextrw $0, %xmm0, %eax -; AVX512F-NEXT: vpextrw $0, %xmm1, %ecx -; AVX512F-NEXT: shll $16, %ecx -; AVX512F-NEXT: vmovd %ecx, %xmm1 -; AVX512F-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill -; AVX512F-NEXT: shll $16, %eax -; AVX512F-NEXT: vmovd %eax, %xmm0 -; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: callq __truncsfbf2@PLT -; AVX512F-NEXT: vpextrw $0, %xmm0, %eax -; AVX512F-NEXT: shll $16, %eax -; AVX512F-NEXT: vmovd %eax, %xmm0 -; AVX512F-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; AVX512F-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX512F-NEXT: callq __truncsfbf2@PLT -; AVX512F-NEXT: vpextrw $0, %xmm0, %eax -; AVX512F-NEXT: shll $16, %eax -; AVX512F-NEXT: vmovd %eax, %xmm0 -; AVX512F-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX512F-NEXT: callq __truncsfbf2@PLT -; AVX512F-NEXT: vpextrw $0, %xmm0, %eax -; AVX512F-NEXT: shll $16, %eax -; AVX512F-NEXT: vmovd %eax, %xmm0 -; AVX512F-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX512F-NEXT: callq __truncsfbf2@PLT -; AVX512F-NEXT: popq %rax -; AVX512F-NEXT: .cfi_def_cfa_offset 8 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: complex_canonicalize_fmul_bfloat: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: pushq %rax -; AVX512BW-NEXT: .cfi_def_cfa_offset 16 -; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax -; AVX512BW-NEXT: vpextrw $0, %xmm1, %ecx -; AVX512BW-NEXT: shll $16, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm1 -; AVX512BW-NEXT: vmovd %xmm1, (%rsp) # 4-byte Folded Spill -; AVX512BW-NEXT: shll $16, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm0 -; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: callq __truncsfbf2@PLT -; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax -; AVX512BW-NEXT: shll $16, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm0 -; AVX512BW-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; AVX512BW-NEXT: vaddss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX512BW-NEXT: callq __truncsfbf2@PLT -; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax -; AVX512BW-NEXT: shll $16, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm0 -; AVX512BW-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX512BW-NEXT: callq __truncsfbf2@PLT -; AVX512BW-NEXT: vpextrw $0, %xmm0, %eax -; AVX512BW-NEXT: shll $16, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm0 -; AVX512BW-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX512BW-NEXT: callq __truncsfbf2@PLT -; AVX512BW-NEXT: popq %rax -; AVX512BW-NEXT: .cfi_def_cfa_offset 8 -; AVX512BW-NEXT: retq -entry: - - %sub1 = fsub bfloat %a, %b - %add = fadd bfloat %sub1, %b - %sub2 = fsub bfloat %add, %sub1 - %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %sub2) - %result = fsub bfloat %canonicalized, %b - ret bfloat %result -} - define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { ; SSE-LABEL: canonicalize_fp64: ; SSE: # %bb.0: # %start @@ -565,6 +401,7 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { ; SSE-NEXT: maxsd %xmm0, %xmm1 ; SSE-NEXT: andnpd %xmm1, %xmm2 ; SSE-NEXT: orpd %xmm3, %xmm2 +; SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; SSE-NEXT: movapd %xmm2, %xmm0 ; SSE-NEXT: retq ; @@ -577,6 +414,7 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { ; SSE2-NEXT: maxsd %xmm0, %xmm1 ; SSE2-NEXT: andnpd %xmm1, %xmm2 ; SSE2-NEXT: orpd %xmm3, %xmm2 +; SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; SSE2-NEXT: movapd %xmm2, %xmm0 ; SSE2-NEXT: retq ; @@ -585,6 +423,7 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { ; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 ; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 +; AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX2-LABEL: canonicalize_fp64: @@ -592,6 +431,7 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { ; AVX2-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 ; AVX2-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 +; AVX2-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: canonicalize_fp64: @@ -599,7 +439,7 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { ; AVX512F-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 ; AVX512F-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 ; AVX512F-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} -; AVX512F-NEXT: vmovapd %xmm2, %xmm0 +; AVX512F-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: canonicalize_fp64: @@ -607,7 +447,7 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { ; AVX512BW-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 ; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 ; AVX512BW-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} -; AVX512BW-NEXT: vmovapd %xmm2, %xmm0 +; AVX512BW-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; AVX512BW-NEXT: retq start: @@ -629,6 +469,7 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 { ; SSE-NEXT: maxss %xmm0, %xmm1 ; SSE-NEXT: andnps %xmm1, %xmm2 ; SSE-NEXT: orps %xmm3, %xmm2 +; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; @@ -641,6 +482,7 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 { ; SSE2-NEXT: maxss %xmm0, %xmm1 ; SSE2-NEXT: andnps %xmm1, %xmm2 ; SSE2-NEXT: orps %xmm3, %xmm2 +; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: retq ; @@ -649,6 +491,7 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 { ; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2 ; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX2-LABEL: canonicalize_fp32: @@ -656,6 +499,7 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 { ; AVX2-NEXT: vmaxss %xmm0, %xmm1, %xmm2 ; AVX2-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: canonicalize_fp32: @@ -663,7 +507,7 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 { ; AVX512F-NEXT: vmaxss %xmm0, %xmm1, %xmm2 ; AVX512F-NEXT: vcmpunordss %xmm0, %xmm0, %k1 ; AVX512F-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} -; AVX512F-NEXT: vmovaps %xmm2, %xmm0 +; AVX512F-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: canonicalize_fp32: @@ -671,7 +515,7 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 { ; AVX512BW-NEXT: vmaxss %xmm0, %xmm1, %xmm2 ; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 ; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} -; AVX512BW-NEXT: vmovaps %xmm2, %xmm0 +; AVX512BW-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; AVX512BW-NEXT: retq start: @@ -962,39 +806,48 @@ define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 { ret void } -define void @v_test_canonicalize__bfloat(bfloat addrspace(1)* %out) { -; SSE-LABEL: v_test_canonicalize__bfloat: -; SSE: # %bb.0: # %entry +define void @canonicalize_undef(double addrspace(1)* %out) { +; SSE-LABEL: canonicalize_undef: +; SSE: # %bb.0: +; SSE-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; SSE-NEXT: movq %rax, (%rdi) ; SSE-NEXT: retq ; -; SSE2-LABEL: v_test_canonicalize__bfloat: -; SSE2: # %bb.0: # %entry +; SSE2-LABEL: canonicalize_undef: +; SSE2: # %bb.0: +; SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; SSE2-NEXT: movq %rax, (%rdi) ; SSE2-NEXT: retq ; -; AVX-LABEL: v_test_canonicalize__bfloat: -; AVX: # %bb.0: # %entry +; AVX-LABEL: canonicalize_undef: +; AVX: # %bb.0: +; AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; AVX-NEXT: movq %rax, (%rdi) ; AVX-NEXT: retq ; -; AVX2-LABEL: v_test_canonicalize__bfloat: -; AVX2: # %bb.0: # %entry +; AVX2-LABEL: canonicalize_undef: +; AVX2: # %bb.0: +; AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; AVX2-NEXT: movq %rax, (%rdi) ; AVX2-NEXT: retq ; -; AVX512F-LABEL: v_test_canonicalize__bfloat: -; AVX512F: # %bb.0: # %entry +; AVX512F-LABEL: canonicalize_undef: +; AVX512F: # %bb.0: +; AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; AVX512F-NEXT: movq %rax, (%rdi) ; AVX512F-NEXT: retq ; -; AVX512BW-LABEL: v_test_canonicalize__bfloat: -; AVX512BW: # %bb.0: # %entry +; AVX512BW-LABEL: canonicalize_undef: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; AVX512BW-NEXT: movq %rax, (%rdi) ; AVX512BW-NEXT: retq -entry: - %val = load bfloat, bfloat addrspace(1)* %out - %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %val) - store bfloat %canonicalized, bfloat addrspace(1)* %out + %canonicalized = call double @llvm.canonicalize.f64(double undef) + store double %canonicalized, double addrspace(1)* %out ret void } declare double @llvm.canonicalize.f64(double) declare float @llvm.canonicalize.f32(float) -declare bfloat @llvm.canonicalize.bf16(bfloat) declare x86_fp80 @llvm.canonicalize.f80(x86_fp80) declare half @llvm.canonicalize.f16(half) From cbe7d0b91e59f5b05e9fd97a679b8304f10a42de Mon Sep 17 00:00:00 2001 From: Pawan Anil Nirpal Date: Wed, 11 Sep 2024 11:22:36 +0200 Subject: [PATCH 05/10] fix run lines to reuse checks --- llvm/lib/Target/X86/X86ISelLowering.cpp | 21 +- llvm/test/CodeGen/X86/canonicalize-vars.ll | 448 ++++----------------- 2 files changed, 78 insertions(+), 391 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index de53fd4f20c78..91f03ccac779f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58150,25 +58150,8 @@ SDValue combineCanonicalize(SDNode *Node, SelectionDAG &DAG) { SDLoc dl(Node); // Canonicalize scalar variable FP Nodes. - SDValue MulNode; - SDValue One; - if (VT == MVT::f32 || VT == MVT::f64) { - One = DAG.getConstantFP(1.0f, dl, VT); - } else if (VT == MVT::f80) { - APFloat Val = APFloat::getOne(APFloat::x87DoubleExtended()); - One = DAG.getConstantFP(Val, dl, VT); - } else if (VT == MVT::f16) { - APFloat Val(APFloat::IEEEhalf(), "1.0"); - One = DAG.getConstantFP(Val, dl, VT); - } else if (VT == MVT::bf16) { - APFloat Val(APFloat::BFloat(), "1.0"); - One = DAG.getConstantFP(Val, dl, VT); - } else { - // Is it better to assert? when we encounter an unknown FP type,Than to - // just replace with the operand! - return Operand; - } - + SDValue One = + DAG.getNode(ISD::SINT_TO_FP, dl, VT, DAG.getConstant(1, dl, MVT::i32)); // TODO: Fix Crash for bf16 when generating strict_fmul as it // leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0, // ConstantFP:bf16, t5 LLVM ERROR: Do not know how to soft diff --git a/llvm/test/CodeGen/X86/canonicalize-vars.ll b/llvm/test/CodeGen/X86/canonicalize-vars.ll index a956449632470..d82749f835736 100644 --- a/llvm/test/CodeGen/X86/canonicalize-vars.ll +++ b/llvm/test/CodeGen/X86/canonicalize-vars.ll @@ -1,10 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5 -; RUN: llc -mattr=sse -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE -; RUN: llc -mattr=sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE2 -; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX -; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX2 -; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512F -; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512BW +; RUN: llc -mattr=+sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512F +; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512BW define float @canon_fp32_varargsf32(float %a) { @@ -13,30 +12,10 @@ define float @canon_fp32_varargsf32(float %a) { ; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: retq ; -; SSE2-LABEL: canon_fp32_varargsf32: -; SSE2: # %bb.0: -; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: retq -; ; AVX-LABEL: canon_fp32_varargsf32: ; AVX: # %bb.0: ; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq -; -; AVX2-LABEL: canon_fp32_varargsf32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: canon_fp32_varargsf32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: canon_fp32_varargsf32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: retq %canonicalized = call float @llvm.canonicalize.f32(float %a) ret float %canonicalized } @@ -49,40 +28,12 @@ define x86_fp80 @canon_fp32_varargsf80(x86_fp80 %a) { ; SSE-NEXT: fmulp %st, %st(1) ; SSE-NEXT: retq ; -; SSE2-LABEL: canon_fp32_varargsf80: -; SSE2: # %bb.0: -; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) -; SSE2-NEXT: fld1 -; SSE2-NEXT: fmulp %st, %st(1) -; SSE2-NEXT: retq -; ; AVX-LABEL: canon_fp32_varargsf80: ; AVX: # %bb.0: ; AVX-NEXT: fldt {{[0-9]+}}(%rsp) ; AVX-NEXT: fld1 ; AVX-NEXT: fmulp %st, %st(1) ; AVX-NEXT: retq -; -; AVX2-LABEL: canon_fp32_varargsf80: -; AVX2: # %bb.0: -; AVX2-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX2-NEXT: fld1 -; AVX2-NEXT: fmulp %st, %st(1) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: canon_fp32_varargsf80: -; AVX512F: # %bb.0: -; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX512F-NEXT: fld1 -; AVX512F-NEXT: fmulp %st, %st(1) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: canon_fp32_varargsf80: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX512BW-NEXT: fld1 -; AVX512BW-NEXT: fmulp %st, %st(1) -; AVX512BW-NEXT: retq %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %a) ret x86_fp80 %canonicalized } @@ -124,76 +75,40 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) { ; SSE-NEXT: .cfi_def_cfa_offset 8 ; SSE-NEXT: retq ; -; SSE2-LABEL: complex_canonicalize_fmul_half: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pushq %rax -; SSE2-NEXT: .cfi_def_cfa_offset 16 -; SSE2-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill -; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload -; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SSE2-NEXT: movss (%rsp), %xmm1 # 4-byte Reload -; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: subss %xmm0, %xmm1 -; SSE2-NEXT: movaps %xmm1, %xmm0 -; SSE2-NEXT: callq __truncsfhf2@PLT -; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill -; SSE2-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload -; SSE2-NEXT: callq __truncsfhf2@PLT -; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload -; SSE2-NEXT: callq __truncsfhf2@PLT -; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movss %xmm0, (%rsp) # 4-byte Spill -; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: mulss (%rsp), %xmm0 # 4-byte Folded Reload -; SSE2-NEXT: callq __truncsfhf2@PLT -; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload -; SSE2-NEXT: callq __truncsfhf2@PLT -; SSE2-NEXT: popq %rax -; SSE2-NEXT: .cfi_def_cfa_offset 8 -; SSE2-NEXT: retq -; -; AVX-LABEL: complex_canonicalize_fmul_half: -; AVX: # %bb.0: # %entry -; AVX-NEXT: pushq %rax -; AVX-NEXT: .cfi_def_cfa_offset 16 -; AVX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill -; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload -; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero -; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; AVX-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload -; AVX-NEXT: # xmm1 = mem[0],zero,zero,zero -; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 -; AVX-NEXT: callq __truncsfhf2@PLT -; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill -; AVX-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX-NEXT: callq __truncsfhf2@PLT -; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX-NEXT: callq __truncsfhf2@PLT -; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill -; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vmulss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX-NEXT: callq __truncsfhf2@PLT -; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX-NEXT: callq __truncsfhf2@PLT -; AVX-NEXT: popq %rax -; AVX-NEXT: .cfi_def_cfa_offset 8 -; AVX-NEXT: retq +; AVX1-LABEL: complex_canonicalize_fmul_half: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: pushq %rax +; AVX1-NEXT: .cfi_def_cfa_offset 16 +; AVX1-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX1-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX1-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload +; AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero +; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX1-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX1-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmulss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: popq %rax +; AVX1-NEXT: .cfi_def_cfa_offset 8 +; AVX1-NEXT: retq ; ; AVX2-LABEL: complex_canonicalize_fmul_half: ; AVX2: # %bb.0: # %entry @@ -317,19 +232,6 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) { ; SSE-NEXT: fsubp %st, %st(1) ; SSE-NEXT: retq ; -; SSE2-LABEL: complex_canonicalize_fmul_x86_fp80: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) -; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) -; SSE2-NEXT: fsub %st(1), %st -; SSE2-NEXT: fld %st(0) -; SSE2-NEXT: fadd %st(2), %st -; SSE2-NEXT: fsubp %st, %st(1) -; SSE2-NEXT: fld1 -; SSE2-NEXT: fmulp %st, %st(1) -; SSE2-NEXT: fsubp %st, %st(1) -; SSE2-NEXT: retq -; ; AVX-LABEL: complex_canonicalize_fmul_x86_fp80: ; AVX: # %bb.0: # %entry ; AVX-NEXT: fldt {{[0-9]+}}(%rsp) @@ -342,45 +244,6 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) { ; AVX-NEXT: fmulp %st, %st(1) ; AVX-NEXT: fsubp %st, %st(1) ; AVX-NEXT: retq -; -; AVX2-LABEL: complex_canonicalize_fmul_x86_fp80: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX2-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX2-NEXT: fsub %st(1), %st -; AVX2-NEXT: fld %st(0) -; AVX2-NEXT: fadd %st(2), %st -; AVX2-NEXT: fsubp %st, %st(1) -; AVX2-NEXT: fld1 -; AVX2-NEXT: fmulp %st, %st(1) -; AVX2-NEXT: fsubp %st, %st(1) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: complex_canonicalize_fmul_x86_fp80: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX512F-NEXT: fsub %st(1), %st -; AVX512F-NEXT: fld %st(0) -; AVX512F-NEXT: fadd %st(2), %st -; AVX512F-NEXT: fsubp %st, %st(1) -; AVX512F-NEXT: fld1 -; AVX512F-NEXT: fmulp %st, %st(1) -; AVX512F-NEXT: fsubp %st, %st(1) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: complex_canonicalize_fmul_x86_fp80: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX512BW-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX512BW-NEXT: fsub %st(1), %st -; AVX512BW-NEXT: fld %st(0) -; AVX512BW-NEXT: fadd %st(2), %st -; AVX512BW-NEXT: fsubp %st, %st(1) -; AVX512BW-NEXT: fld1 -; AVX512BW-NEXT: fmulp %st, %st(1) -; AVX512BW-NEXT: fsubp %st, %st(1) -; AVX512BW-NEXT: retq entry: %mul1 = fsub x86_fp80 %a, %b @@ -405,26 +268,13 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { ; SSE-NEXT: movapd %xmm2, %xmm0 ; SSE-NEXT: retq ; -; SSE2-LABEL: canonicalize_fp64: -; SSE2: # %bb.0: # %start -; SSE2-NEXT: movapd %xmm0, %xmm2 -; SSE2-NEXT: cmpunordsd %xmm0, %xmm2 -; SSE2-NEXT: movapd %xmm2, %xmm3 -; SSE2-NEXT: andpd %xmm1, %xmm3 -; SSE2-NEXT: maxsd %xmm0, %xmm1 -; SSE2-NEXT: andnpd %xmm1, %xmm2 -; SSE2-NEXT: orpd %xmm3, %xmm2 -; SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE2-NEXT: movapd %xmm2, %xmm0 -; SSE2-NEXT: retq -; -; AVX-LABEL: canonicalize_fp64: -; AVX: # %bb.0: # %start -; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 -; AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: canonicalize_fp64: +; AVX1: # %bb.0: # %start +; AVX1-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 +; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: retq ; ; AVX2-LABEL: canonicalize_fp64: ; AVX2: # %bb.0: # %start @@ -473,26 +323,13 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 { ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; -; SSE2-LABEL: canonicalize_fp32: -; SSE2: # %bb.0: # %start -; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: cmpunordss %xmm0, %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm3 -; SSE2-NEXT: andps %xmm1, %xmm3 -; SSE2-NEXT: maxss %xmm0, %xmm1 -; SSE2-NEXT: andnps %xmm1, %xmm2 -; SSE2-NEXT: orps %xmm3, %xmm2 -; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE2-NEXT: movaps %xmm2, %xmm0 -; SSE2-NEXT: retq -; -; AVX-LABEL: canonicalize_fp32: -; AVX: # %bb.0: # %start -; AVX-NEXT: vmaxss %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 -; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: canonicalize_fp32: +; AVX1: # %bb.0: # %start +; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2 +; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: retq ; ; AVX2-LABEL: canonicalize_fp32: ; AVX2: # %bb.0: # %start @@ -535,40 +372,12 @@ define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 { ; SSE-NEXT: movss %xmm0, (%rdi) ; SSE-NEXT: retq ; -; SSE2-LABEL: v_test_canonicalize_var_f32: -; SSE2: # %bb.0: -; SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] -; SSE2-NEXT: mulss (%rdi), %xmm0 -; SSE2-NEXT: movss %xmm0, (%rdi) -; SSE2-NEXT: retq -; ; AVX-LABEL: v_test_canonicalize_var_f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] ; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0 ; AVX-NEXT: vmovss %xmm0, (%rdi) ; AVX-NEXT: retq -; -; AVX2-LABEL: v_test_canonicalize_var_f32: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] -; AVX2-NEXT: vmulss (%rdi), %xmm0, %xmm0 -; AVX2-NEXT: vmovss %xmm0, (%rdi) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: v_test_canonicalize_var_f32: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] -; AVX512F-NEXT: vmulss (%rdi), %xmm0, %xmm0 -; AVX512F-NEXT: vmovss %xmm0, (%rdi) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: v_test_canonicalize_var_f32: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] -; AVX512BW-NEXT: vmulss (%rdi), %xmm0, %xmm0 -; AVX512BW-NEXT: vmovss %xmm0, (%rdi) -; AVX512BW-NEXT: retq %val = load float, float addrspace(1)* %out %canonicalized = call float @llvm.canonicalize.f32(float %val) store float %canonicalized, float addrspace(1)* %out @@ -584,14 +393,6 @@ define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { ; SSE-NEXT: fstpt (%rdi) ; SSE-NEXT: retq ; -; SSE2-LABEL: v_test_canonicalize_x86_fp80: -; SSE2: # %bb.0: -; SSE2-NEXT: fldt (%rdi) -; SSE2-NEXT: fld1 -; SSE2-NEXT: fmulp %st, %st(1) -; SSE2-NEXT: fstpt (%rdi) -; SSE2-NEXT: retq -; ; AVX-LABEL: v_test_canonicalize_x86_fp80: ; AVX: # %bb.0: ; AVX-NEXT: fldt (%rdi) @@ -599,30 +400,6 @@ define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { ; AVX-NEXT: fmulp %st, %st(1) ; AVX-NEXT: fstpt (%rdi) ; AVX-NEXT: retq -; -; AVX2-LABEL: v_test_canonicalize_x86_fp80: -; AVX2: # %bb.0: -; AVX2-NEXT: fldt (%rdi) -; AVX2-NEXT: fld1 -; AVX2-NEXT: fmulp %st, %st(1) -; AVX2-NEXT: fstpt (%rdi) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: v_test_canonicalize_x86_fp80: -; AVX512F: # %bb.0: -; AVX512F-NEXT: fldt (%rdi) -; AVX512F-NEXT: fld1 -; AVX512F-NEXT: fmulp %st, %st(1) -; AVX512F-NEXT: fstpt (%rdi) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: v_test_canonicalize_x86_fp80: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: fldt (%rdi) -; AVX512BW-NEXT: fld1 -; AVX512BW-NEXT: fmulp %st, %st(1) -; AVX512BW-NEXT: fstpt (%rdi) -; AVX512BW-NEXT: retq %val = load x86_fp80, x86_fp80 addrspace(1)* %out %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %val) store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out @@ -653,50 +430,27 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) { ; SSE-NEXT: .cfi_def_cfa_offset 8 ; SSE-NEXT: retq ; -; SSE2-LABEL: v_test_canonicalize__half: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 16 -; SSE2-NEXT: subq $16, %rsp -; SSE2-NEXT: .cfi_def_cfa_offset 32 -; SSE2-NEXT: .cfi_offset %rbx, -16 -; SSE2-NEXT: movq %rdi, %rbx -; SSE2-NEXT: pinsrw $0, (%rdi), %xmm0 -; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; SSE2-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: callq __extendhfsf2@PLT -; SSE2-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload -; SSE2-NEXT: callq __truncsfhf2@PLT -; SSE2-NEXT: pextrw $0, %xmm0, %eax -; SSE2-NEXT: movw %ax, (%rbx) -; SSE2-NEXT: addq $16, %rsp -; SSE2-NEXT: .cfi_def_cfa_offset 16 -; SSE2-NEXT: popq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 8 -; SSE2-NEXT: retq -; -; AVX-LABEL: v_test_canonicalize__half: -; AVX: # %bb.0: # %entry -; AVX-NEXT: pushq %rbx -; AVX-NEXT: .cfi_def_cfa_offset 16 -; AVX-NEXT: subq $16, %rsp -; AVX-NEXT: .cfi_def_cfa_offset 32 -; AVX-NEXT: .cfi_offset %rbx, -16 -; AVX-NEXT: movq %rdi, %rbx -; AVX-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 -; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; AVX-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: callq __extendhfsf2@PLT -; AVX-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX-NEXT: callq __truncsfhf2@PLT -; AVX-NEXT: vpextrw $0, %xmm0, (%rbx) -; AVX-NEXT: addq $16, %rsp -; AVX-NEXT: .cfi_def_cfa_offset 16 -; AVX-NEXT: popq %rbx -; AVX-NEXT: .cfi_def_cfa_offset 8 -; AVX-NEXT: retq +; AVX1-LABEL: v_test_canonicalize__half: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: pushq %rbx +; AVX1-NEXT: .cfi_def_cfa_offset 16 +; AVX1-NEXT: subq $16, %rsp +; AVX1-NEXT: .cfi_def_cfa_offset 32 +; AVX1-NEXT: .cfi_offset %rbx, -16 +; AVX1-NEXT: movq %rdi, %rbx +; AVX1-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; AVX1-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: vpextrw $0, %xmm0, (%rbx) +; AVX1-NEXT: addq $16, %rsp +; AVX1-NEXT: .cfi_def_cfa_offset 16 +; AVX1-NEXT: popq %rbx +; AVX1-NEXT: .cfi_def_cfa_offset 8 +; AVX1-NEXT: retq ; ; AVX2-LABEL: v_test_canonicalize__half: ; AVX2: # %bb.0: # %entry @@ -766,40 +520,12 @@ define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 { ; SSE-NEXT: movsd %xmm0, (%rdi) ; SSE-NEXT: retq ; -; SSE2-LABEL: v_test_canonicalize_var_f64: -; SSE2: # %bb.0: -; SSE2-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] -; SSE2-NEXT: mulsd (%rdi), %xmm0 -; SSE2-NEXT: movsd %xmm0, (%rdi) -; SSE2-NEXT: retq -; ; AVX-LABEL: v_test_canonicalize_var_f64: ; AVX: # %bb.0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] ; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 ; AVX-NEXT: vmovsd %xmm0, (%rdi) ; AVX-NEXT: retq -; -; AVX2-LABEL: v_test_canonicalize_var_f64: -; AVX2: # %bb.0: -; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] -; AVX2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 -; AVX2-NEXT: vmovsd %xmm0, (%rdi) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: v_test_canonicalize_var_f64: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] -; AVX512F-NEXT: vmulsd (%rdi), %xmm0, %xmm0 -; AVX512F-NEXT: vmovsd %xmm0, (%rdi) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: v_test_canonicalize_var_f64: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] -; AVX512BW-NEXT: vmulsd (%rdi), %xmm0, %xmm0 -; AVX512BW-NEXT: vmovsd %xmm0, (%rdi) -; AVX512BW-NEXT: retq %val = load double, double addrspace(1)* %out %canonicalized = call double @llvm.canonicalize.f64(double %val) store double %canonicalized, double addrspace(1)* %out @@ -813,35 +539,11 @@ define void @canonicalize_undef(double addrspace(1)* %out) { ; SSE-NEXT: movq %rax, (%rdi) ; SSE-NEXT: retq ; -; SSE2-LABEL: canonicalize_undef: -; SSE2: # %bb.0: -; SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; SSE2-NEXT: movq %rax, (%rdi) -; SSE2-NEXT: retq -; ; AVX-LABEL: canonicalize_undef: ; AVX: # %bb.0: ; AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 ; AVX-NEXT: movq %rax, (%rdi) ; AVX-NEXT: retq -; -; AVX2-LABEL: canonicalize_undef: -; AVX2: # %bb.0: -; AVX2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; AVX2-NEXT: movq %rax, (%rdi) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: canonicalize_undef: -; AVX512F: # %bb.0: -; AVX512F-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; AVX512F-NEXT: movq %rax, (%rdi) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: canonicalize_undef: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; AVX512BW-NEXT: movq %rax, (%rdi) -; AVX512BW-NEXT: retq %canonicalized = call double @llvm.canonicalize.f64(double undef) store double %canonicalized, double addrspace(1)* %out ret void @@ -851,3 +553,5 @@ declare double @llvm.canonicalize.f64(double) declare float @llvm.canonicalize.f32(float) declare x86_fp80 @llvm.canonicalize.f80(x86_fp80) declare half @llvm.canonicalize.f16(half) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; SSE2: {{.*}} From 9e37e86ff6c5d1bdcd003711e48783e0c1a90235 Mon Sep 17 00:00:00 2001 From: Pawan Anil Nirpal Date: Wed, 11 Sep 2024 19:36:01 +0200 Subject: [PATCH 06/10] fix lit failure for sse2 mode --- .../CodeGen/X86/canonicalize-vars-f16-type.ll | 299 ++++++++++ llvm/test/CodeGen/X86/canonicalize-vars.ll | 551 ++++++------------ 2 files changed, 491 insertions(+), 359 deletions(-) create mode 100644 llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll diff --git a/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll b/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll new file mode 100644 index 0000000000000..d42ed4fc3831f --- /dev/null +++ b/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll @@ -0,0 +1,299 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5 + +; RUN: llc -mattr=+sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=SSE,SSE2 +; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512F +; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512BW + +define void @v_test_canonicalize__half(half addrspace(1)* %out) { +; SSE-LABEL: v_test_canonicalize__half: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rbx +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: subq $16, %rsp +; SSE-NEXT: .cfi_def_cfa_offset 32 +; SSE-NEXT: .cfi_offset %rbx, -16 +; SSE-NEXT: movq %rdi, %rbx +; SSE-NEXT: pinsrw $0, (%rdi), %xmm0 +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: pextrw $0, %xmm0, %eax +; SSE-NEXT: movw %ax, (%rbx) +; SSE-NEXT: addq $16, %rsp +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: popq %rbx +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX1-LABEL: v_test_canonicalize__half: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: pushq %rbx +; AVX1-NEXT: .cfi_def_cfa_offset 16 +; AVX1-NEXT: subq $16, %rsp +; AVX1-NEXT: .cfi_def_cfa_offset 32 +; AVX1-NEXT: .cfi_offset %rbx, -16 +; AVX1-NEXT: movq %rdi, %rbx +; AVX1-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; AVX1-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: vpextrw $0, %xmm0, (%rbx) +; AVX1-NEXT: addq $16, %rsp +; AVX1-NEXT: .cfi_def_cfa_offset 16 +; AVX1-NEXT: popq %rbx +; AVX1-NEXT: .cfi_def_cfa_offset 8 +; AVX1-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize__half: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: .cfi_def_cfa_offset 16 +; AVX2-NEXT: subq $16, %rsp +; AVX2-NEXT: .cfi_def_cfa_offset 32 +; AVX2-NEXT: .cfi_offset %rbx, -16 +; AVX2-NEXT: movq %rdi, %rbx +; AVX2-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: vpextrw $0, %xmm0, (%rbx) +; AVX2-NEXT: addq $16, %rsp +; AVX2-NEXT: .cfi_def_cfa_offset 16 +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: .cfi_def_cfa_offset 8 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize__half: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: movzwl (%rdi), %eax +; AVX512F-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm0 +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512F-NEXT: vmovd %eax, %xmm1 +; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX512F-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512F-NEXT: vmovd %xmm0, %eax +; AVX512F-NEXT: movw %ax, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize__half: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: movzwl (%rdi), %eax +; AVX512BW-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx +; AVX512BW-NEXT: vmovd %ecx, %xmm0 +; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512BW-NEXT: vmovd %eax, %xmm1 +; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX512BW-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: movw %ax, (%rdi) +; AVX512BW-NEXT: retq +entry: + %val = load half, half addrspace(1)* %out + %canonicalized = call half @llvm.canonicalize.f16(half %val) + store half %canonicalized, half addrspace(1)* %out + ret void +} + + +define half @complex_canonicalize_fmul_half(half %a, half %b) { +; SSE-LABEL: complex_canonicalize_fmul_half: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: movss (%rsp), %xmm1 # 4-byte Reload +; SSE-NEXT: # xmm1 = mem[0],zero,zero,zero +; SSE-NEXT: subss %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; SSE-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill +; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: mulss (%rsp), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX1-LABEL: complex_canonicalize_fmul_half: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: pushq %rax +; AVX1-NEXT: .cfi_def_cfa_offset 16 +; AVX1-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX1-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX1-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload +; AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero +; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX1-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX1-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmulss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: popq %rax +; AVX1-NEXT: .cfi_def_cfa_offset 8 +; AVX1-NEXT: retq +; +; AVX2-LABEL: complex_canonicalize_fmul_half: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: pushq %rax +; AVX2-NEXT: .cfi_def_cfa_offset 16 +; AVX2-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVX2-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload +; AVX2-NEXT: # xmm1 = mem[0],zero,zero,zero +; AVX2-NEXT: vsubss %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX2-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill +; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmulss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: popq %rax +; AVX2-NEXT: .cfi_def_cfa_offset 8 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: complex_canonicalize_fmul_half: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpextrw $0, %xmm1, %eax +; AVX512F-NEXT: vpextrw $0, %xmm0, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm0 +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512F-NEXT: vmovd %eax, %xmm1 +; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512F-NEXT: vaddss %xmm1, %xmm0, %xmm2 +; AVX512F-NEXT: vcvtps2ph $4, %xmm2, %xmm2 +; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX512F-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512F-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax +; AVX512F-NEXT: vmovd %eax, %xmm2 +; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX512F-NEXT: vmulss %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] +; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512F-NEXT: vmovd %xmm0, %eax +; AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: complex_canonicalize_fmul_half: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax +; AVX512BW-NEXT: vpextrw $0, %xmm0, %ecx +; AVX512BW-NEXT: vmovd %ecx, %xmm0 +; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512BW-NEXT: vmovd %eax, %xmm1 +; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512BW-NEXT: vaddss %xmm1, %xmm0, %xmm2 +; AVX512BW-NEXT: vcvtps2ph $4, %xmm2, %xmm2 +; AVX512BW-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX512BW-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512BW-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax +; AVX512BW-NEXT: vmovd %eax, %xmm2 +; AVX512BW-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX512BW-NEXT: vmulss %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] +; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; AVX512BW-NEXT: retq +entry: + + %mul1 = fsub half %a, %b + %add = fadd half %mul1, %b + %mul2 = fsub half %add, %mul1 + %canonicalized = call half @llvm.canonicalize.f16(half %mul2) + %result = fsub half %canonicalized, %b + ret half %result +} + +declare half @llvm.canonicalize.f16(half) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} +; SSE2: {{.*}} diff --git a/llvm/test/CodeGen/X86/canonicalize-vars.ll b/llvm/test/CodeGen/X86/canonicalize-vars.ll index d82749f835736..fe399ce127498 100644 --- a/llvm/test/CodeGen/X86/canonicalize-vars.ll +++ b/llvm/test/CodeGen/X86/canonicalize-vars.ll @@ -1,16 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5 +; RUN: llc < %s -mtriple=i686-- --mattr=-sse2 | FileCheck %s -check-prefixes=SSE,SSE1 ; RUN: llc -mattr=+sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=SSE,SSE2 ; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX1 ; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX2 ; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512F ; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512BW - define float @canon_fp32_varargsf32(float %a) { -; SSE-LABEL: canon_fp32_varargsf32: -; SSE: # %bb.0: -; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: retq +; SSE1-LABEL: canon_fp32_varargsf32: +; SSE1: # %bb.0: +; SSE1-NEXT: fld1 +; SSE1-NEXT: fmuls {{[0-9]+}}(%esp) +; SSE1-NEXT: retl +; +; SSE2-LABEL: canon_fp32_varargsf32: +; SSE2: # %bb.0: +; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: retq ; ; AVX-LABEL: canon_fp32_varargsf32: ; AVX: # %bb.0: @@ -21,12 +27,19 @@ define float @canon_fp32_varargsf32(float %a) { } define x86_fp80 @canon_fp32_varargsf80(x86_fp80 %a) { -; SSE-LABEL: canon_fp32_varargsf80: -; SSE: # %bb.0: -; SSE-NEXT: fldt {{[0-9]+}}(%rsp) -; SSE-NEXT: fld1 -; SSE-NEXT: fmulp %st, %st(1) -; SSE-NEXT: retq +; SSE1-LABEL: canon_fp32_varargsf80: +; SSE1: # %bb.0: +; SSE1-NEXT: fldt {{[0-9]+}}(%esp) +; SSE1-NEXT: fld1 +; SSE1-NEXT: fmulp %st, %st(1) +; SSE1-NEXT: retl +; +; SSE2-LABEL: canon_fp32_varargsf80: +; SSE2: # %bb.0: +; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE2-NEXT: fld1 +; SSE2-NEXT: fmulp %st, %st(1) +; SSE2-NEXT: retq ; ; AVX-LABEL: canon_fp32_varargsf80: ; AVX: # %bb.0: @@ -38,199 +51,32 @@ define x86_fp80 @canon_fp32_varargsf80(x86_fp80 %a) { ret x86_fp80 %canonicalized } -define half @complex_canonicalize_fmul_half(half %a, half %b) { -; SSE-LABEL: complex_canonicalize_fmul_half: -; SSE: # %bb.0: # %entry -; SSE-NEXT: pushq %rax -; SSE-NEXT: .cfi_def_cfa_offset 16 -; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill -; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload -; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; SSE-NEXT: movss (%rsp), %xmm1 # 4-byte Reload -; SSE-NEXT: # xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: subss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: callq __truncsfhf2@PLT -; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill -; SSE-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload -; SSE-NEXT: callq __truncsfhf2@PLT -; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: subss (%rsp), %xmm0 # 4-byte Folded Reload -; SSE-NEXT: callq __truncsfhf2@PLT -; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill -; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: mulss (%rsp), %xmm0 # 4-byte Folded Reload -; SSE-NEXT: callq __truncsfhf2@PLT -; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload -; SSE-NEXT: callq __truncsfhf2@PLT -; SSE-NEXT: popq %rax -; SSE-NEXT: .cfi_def_cfa_offset 8 -; SSE-NEXT: retq -; -; AVX1-LABEL: complex_canonicalize_fmul_half: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: pushq %rax -; AVX1-NEXT: .cfi_def_cfa_offset 16 -; AVX1-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; AVX1-NEXT: callq __extendhfsf2@PLT -; AVX1-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill -; AVX1-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload -; AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero -; AVX1-NEXT: callq __extendhfsf2@PLT -; AVX1-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; AVX1-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload -; AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero -; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: callq __truncsfhf2@PLT -; AVX1-NEXT: callq __extendhfsf2@PLT -; AVX1-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill -; AVX1-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX1-NEXT: callq __truncsfhf2@PLT -; AVX1-NEXT: callq __extendhfsf2@PLT -; AVX1-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX1-NEXT: callq __truncsfhf2@PLT -; AVX1-NEXT: callq __extendhfsf2@PLT -; AVX1-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill -; AVX1-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: callq __extendhfsf2@PLT -; AVX1-NEXT: vmulss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX1-NEXT: callq __truncsfhf2@PLT -; AVX1-NEXT: callq __extendhfsf2@PLT -; AVX1-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX1-NEXT: callq __truncsfhf2@PLT -; AVX1-NEXT: popq %rax -; AVX1-NEXT: .cfi_def_cfa_offset 8 -; AVX1-NEXT: retq -; -; AVX2-LABEL: complex_canonicalize_fmul_half: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: pushq %rax -; AVX2-NEXT: .cfi_def_cfa_offset 16 -; AVX2-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill -; AVX2-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload -; AVX2-NEXT: # xmm0 = mem[0],zero,zero,zero -; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; AVX2-NEXT: vmovss (%rsp), %xmm1 # 4-byte Reload -; AVX2-NEXT: # xmm1 = mem[0],zero,zero,zero -; AVX2-NEXT: vsubss %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: callq __truncsfhf2@PLT -; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill -; AVX2-NEXT: vaddss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX2-NEXT: callq __truncsfhf2@PLT -; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vsubss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX2-NEXT: callq __truncsfhf2@PLT -; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill -; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vmulss (%rsp), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX2-NEXT: callq __truncsfhf2@PLT -; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX2-NEXT: callq __truncsfhf2@PLT -; AVX2-NEXT: popq %rax -; AVX2-NEXT: .cfi_def_cfa_offset 8 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: complex_canonicalize_fmul_half: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: vpextrw $0, %xmm1, %eax -; AVX512F-NEXT: vpextrw $0, %xmm0, %ecx -; AVX512F-NEXT: vmovd %ecx, %xmm0 -; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %eax, %xmm1 -; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1 -; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX512F-NEXT: vaddss %xmm1, %xmm0, %xmm2 -; AVX512F-NEXT: vcvtps2ph $4, %xmm2, %xmm2 -; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512F-NEXT: vsubss %xmm0, %xmm2, %xmm0 -; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX512F-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; AVX512F-NEXT: vmovd %eax, %xmm2 -; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512F-NEXT: vmulss %xmm0, %xmm2, %xmm0 -; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] -; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX512F-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %xmm0, %eax -; AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: complex_canonicalize_fmul_half: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax -; AVX512BW-NEXT: vpextrw $0, %xmm0, %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm0 -; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX512BW-NEXT: vmovd %eax, %xmm1 -; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 -; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX512BW-NEXT: vaddss %xmm1, %xmm0, %xmm2 -; AVX512BW-NEXT: vcvtps2ph $4, %xmm2, %xmm2 -; AVX512BW-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512BW-NEXT: vsubss %xmm0, %xmm2, %xmm0 -; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX512BW-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax -; AVX512BW-NEXT: vmovd %eax, %xmm2 -; AVX512BW-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512BW-NEXT: vmulss %xmm0, %xmm2, %xmm0 -; AVX512BW-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] -; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX512BW-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovd %xmm0, %eax -; AVX512BW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 -; AVX512BW-NEXT: retq -entry: - - %mul1 = fsub half %a, %b - %add = fadd half %mul1, %b - %mul2 = fsub half %add, %mul1 - %canonicalized = call half @llvm.canonicalize.f16(half %mul2) - %result = fsub half %canonicalized, %b - ret half %result -} - define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) { -; SSE-LABEL: complex_canonicalize_fmul_x86_fp80: -; SSE: # %bb.0: # %entry -; SSE-NEXT: fldt {{[0-9]+}}(%rsp) -; SSE-NEXT: fldt {{[0-9]+}}(%rsp) -; SSE-NEXT: fsub %st(1), %st -; SSE-NEXT: fld %st(0) -; SSE-NEXT: fadd %st(2), %st -; SSE-NEXT: fsubp %st, %st(1) -; SSE-NEXT: fld1 -; SSE-NEXT: fmulp %st, %st(1) -; SSE-NEXT: fsubp %st, %st(1) -; SSE-NEXT: retq +; SSE1-LABEL: complex_canonicalize_fmul_x86_fp80: +; SSE1: # %bb.0: # %entry +; SSE1-NEXT: fldt {{[0-9]+}}(%esp) +; SSE1-NEXT: fldt {{[0-9]+}}(%esp) +; SSE1-NEXT: fsub %st(1), %st +; SSE1-NEXT: fld %st(0) +; SSE1-NEXT: fadd %st(2), %st +; SSE1-NEXT: fsubp %st, %st(1) +; SSE1-NEXT: fld1 +; SSE1-NEXT: fmulp %st, %st(1) +; SSE1-NEXT: fsubp %st, %st(1) +; SSE1-NEXT: retl +; +; SSE2-LABEL: complex_canonicalize_fmul_x86_fp80: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE2-NEXT: fsub %st(1), %st +; SSE2-NEXT: fld %st(0) +; SSE2-NEXT: fadd %st(2), %st +; SSE2-NEXT: fsubp %st, %st(1) +; SSE2-NEXT: fld1 +; SSE2-NEXT: fmulp %st, %st(1) +; SSE2-NEXT: fsubp %st, %st(1) +; SSE2-NEXT: retq ; ; AVX-LABEL: complex_canonicalize_fmul_x86_fp80: ; AVX: # %bb.0: # %entry @@ -255,18 +101,49 @@ entry: } define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { -; SSE-LABEL: canonicalize_fp64: -; SSE: # %bb.0: # %start -; SSE-NEXT: movapd %xmm0, %xmm2 -; SSE-NEXT: cmpunordsd %xmm0, %xmm2 -; SSE-NEXT: movapd %xmm2, %xmm3 -; SSE-NEXT: andpd %xmm1, %xmm3 -; SSE-NEXT: maxsd %xmm0, %xmm1 -; SSE-NEXT: andnpd %xmm1, %xmm2 -; SSE-NEXT: orpd %xmm3, %xmm2 -; SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE-NEXT: movapd %xmm2, %xmm0 -; SSE-NEXT: retq +; SSE1-LABEL: canonicalize_fp64: +; SSE1: # %bb.0: # %start +; SSE1-NEXT: fldl {{[0-9]+}}(%esp) +; SSE1-NEXT: fldl {{[0-9]+}}(%esp) +; SSE1-NEXT: fucom %st(1) +; SSE1-NEXT: fnstsw %ax +; SSE1-NEXT: # kill: def $ah killed $ah killed $ax +; SSE1-NEXT: sahf +; SSE1-NEXT: fxch %st(1) +; SSE1-NEXT: fucom %st(0) +; SSE1-NEXT: fnstsw %ax +; SSE1-NEXT: fld %st(1) +; SSE1-NEXT: ja .LBB3_2 +; SSE1-NEXT: # %bb.1: # %start +; SSE1-NEXT: fstp %st(0) +; SSE1-NEXT: fldz +; SSE1-NEXT: fxch %st(1) +; SSE1-NEXT: .LBB3_2: # %start +; SSE1-NEXT: fstp %st(1) +; SSE1-NEXT: # kill: def $ah killed $ah killed $ax +; SSE1-NEXT: sahf +; SSE1-NEXT: jp .LBB3_4 +; SSE1-NEXT: # %bb.3: # %start +; SSE1-NEXT: fstp %st(1) +; SSE1-NEXT: fldz +; SSE1-NEXT: .LBB3_4: # %start +; SSE1-NEXT: fstp %st(0) +; SSE1-NEXT: fld1 +; SSE1-NEXT: fmulp %st, %st(1) +; SSE1-NEXT: retl +; +; SSE2-LABEL: canonicalize_fp64: +; SSE2: # %bb.0: # %start +; SSE2-NEXT: movapd %xmm0, %xmm2 +; SSE2-NEXT: cmpunordsd %xmm0, %xmm2 +; SSE2-NEXT: movapd %xmm2, %xmm3 +; SSE2-NEXT: andpd %xmm1, %xmm3 +; SSE2-NEXT: maxsd %xmm0, %xmm1 +; SSE2-NEXT: andnpd %xmm1, %xmm2 +; SSE2-NEXT: orpd %xmm3, %xmm2 +; SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: retq ; ; AVX1-LABEL: canonicalize_fp64: ; AVX1: # %bb.0: # %start @@ -310,18 +187,49 @@ start: } define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 { -; SSE-LABEL: canonicalize_fp32: -; SSE: # %bb.0: # %start -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: cmpunordss %xmm0, %xmm2 -; SSE-NEXT: movaps %xmm2, %xmm3 -; SSE-NEXT: andps %xmm1, %xmm3 -; SSE-NEXT: maxss %xmm0, %xmm1 -; SSE-NEXT: andnps %xmm1, %xmm2 -; SSE-NEXT: orps %xmm3, %xmm2 -; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE-NEXT: movaps %xmm2, %xmm0 -; SSE-NEXT: retq +; SSE1-LABEL: canonicalize_fp32: +; SSE1: # %bb.0: # %start +; SSE1-NEXT: flds {{[0-9]+}}(%esp) +; SSE1-NEXT: flds {{[0-9]+}}(%esp) +; SSE1-NEXT: fucom %st(1) +; SSE1-NEXT: fnstsw %ax +; SSE1-NEXT: # kill: def $ah killed $ah killed $ax +; SSE1-NEXT: sahf +; SSE1-NEXT: fxch %st(1) +; SSE1-NEXT: fucom %st(0) +; SSE1-NEXT: fnstsw %ax +; SSE1-NEXT: fld %st(1) +; SSE1-NEXT: ja .LBB4_2 +; SSE1-NEXT: # %bb.1: # %start +; SSE1-NEXT: fstp %st(0) +; SSE1-NEXT: fldz +; SSE1-NEXT: fxch %st(1) +; SSE1-NEXT: .LBB4_2: # %start +; SSE1-NEXT: fstp %st(1) +; SSE1-NEXT: # kill: def $ah killed $ah killed $ax +; SSE1-NEXT: sahf +; SSE1-NEXT: jp .LBB4_4 +; SSE1-NEXT: # %bb.3: # %start +; SSE1-NEXT: fstp %st(1) +; SSE1-NEXT: fldz +; SSE1-NEXT: .LBB4_4: # %start +; SSE1-NEXT: fstp %st(0) +; SSE1-NEXT: fld1 +; SSE1-NEXT: fmulp %st, %st(1) +; SSE1-NEXT: retl +; +; SSE2-LABEL: canonicalize_fp32: +; SSE2: # %bb.0: # %start +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: cmpunordss %xmm0, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm3 +; SSE2-NEXT: andps %xmm1, %xmm3 +; SSE2-NEXT: maxss %xmm0, %xmm1 +; SSE2-NEXT: andnps %xmm1, %xmm2 +; SSE2-NEXT: orps %xmm3, %xmm2 +; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: retq ; ; AVX1-LABEL: canonicalize_fp32: ; AVX1: # %bb.0: # %start @@ -365,12 +273,20 @@ start: } define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 { -; SSE-LABEL: v_test_canonicalize_var_f32: -; SSE: # %bb.0: -; SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] -; SSE-NEXT: mulss (%rdi), %xmm0 -; SSE-NEXT: movss %xmm0, (%rdi) -; SSE-NEXT: retq +; SSE1-LABEL: v_test_canonicalize_var_f32: +; SSE1: # %bb.0: +; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE1-NEXT: fld1 +; SSE1-NEXT: fmuls (%eax) +; SSE1-NEXT: fstps (%eax) +; SSE1-NEXT: retl +; +; SSE2-LABEL: v_test_canonicalize_var_f32: +; SSE2: # %bb.0: +; SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; SSE2-NEXT: mulss (%rdi), %xmm0 +; SSE2-NEXT: movss %xmm0, (%rdi) +; SSE2-NEXT: retq ; ; AVX-LABEL: v_test_canonicalize_var_f32: ; AVX: # %bb.0: @@ -385,13 +301,22 @@ define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 { } define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { -; SSE-LABEL: v_test_canonicalize_x86_fp80: -; SSE: # %bb.0: -; SSE-NEXT: fldt (%rdi) -; SSE-NEXT: fld1 -; SSE-NEXT: fmulp %st, %st(1) -; SSE-NEXT: fstpt (%rdi) -; SSE-NEXT: retq +; SSE1-LABEL: v_test_canonicalize_x86_fp80: +; SSE1: # %bb.0: +; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE1-NEXT: fldt (%eax) +; SSE1-NEXT: fld1 +; SSE1-NEXT: fmulp %st, %st(1) +; SSE1-NEXT: fstpt (%eax) +; SSE1-NEXT: retl +; +; SSE2-LABEL: v_test_canonicalize_x86_fp80: +; SSE2: # %bb.0: +; SSE2-NEXT: fldt (%rdi) +; SSE2-NEXT: fld1 +; SSE2-NEXT: fmulp %st, %st(1) +; SSE2-NEXT: fstpt (%rdi) +; SSE2-NEXT: retq ; ; AVX-LABEL: v_test_canonicalize_x86_fp80: ; AVX: # %bb.0: @@ -406,119 +331,21 @@ define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { ret void } -define void @v_test_canonicalize__half(half addrspace(1)* %out) { -; SSE-LABEL: v_test_canonicalize__half: -; SSE: # %bb.0: # %entry -; SSE-NEXT: pushq %rbx -; SSE-NEXT: .cfi_def_cfa_offset 16 -; SSE-NEXT: subq $16, %rsp -; SSE-NEXT: .cfi_def_cfa_offset 32 -; SSE-NEXT: .cfi_offset %rbx, -16 -; SSE-NEXT: movq %rdi, %rbx -; SSE-NEXT: pinsrw $0, (%rdi), %xmm0 -; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: callq __extendhfsf2@PLT -; SSE-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload -; SSE-NEXT: callq __truncsfhf2@PLT -; SSE-NEXT: pextrw $0, %xmm0, %eax -; SSE-NEXT: movw %ax, (%rbx) -; SSE-NEXT: addq $16, %rsp -; SSE-NEXT: .cfi_def_cfa_offset 16 -; SSE-NEXT: popq %rbx -; SSE-NEXT: .cfi_def_cfa_offset 8 -; SSE-NEXT: retq -; -; AVX1-LABEL: v_test_canonicalize__half: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 16 -; AVX1-NEXT: subq $16, %rsp -; AVX1-NEXT: .cfi_def_cfa_offset 32 -; AVX1-NEXT: .cfi_offset %rbx, -16 -; AVX1-NEXT: movq %rdi, %rbx -; AVX1-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 -; AVX1-NEXT: callq __extendhfsf2@PLT -; AVX1-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; AVX1-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: callq __extendhfsf2@PLT -; AVX1-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX1-NEXT: callq __truncsfhf2@PLT -; AVX1-NEXT: vpextrw $0, %xmm0, (%rbx) -; AVX1-NEXT: addq $16, %rsp -; AVX1-NEXT: .cfi_def_cfa_offset 16 -; AVX1-NEXT: popq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 8 -; AVX1-NEXT: retq -; -; AVX2-LABEL: v_test_canonicalize__half: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 16 -; AVX2-NEXT: subq $16, %rsp -; AVX2-NEXT: .cfi_def_cfa_offset 32 -; AVX2-NEXT: .cfi_offset %rbx, -16 -; AVX2-NEXT: movq %rdi, %rbx -; AVX2-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 -; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill -; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: callq __extendhfsf2@PLT -; AVX2-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload -; AVX2-NEXT: callq __truncsfhf2@PLT -; AVX2-NEXT: vpextrw $0, %xmm0, (%rbx) -; AVX2-NEXT: addq $16, %rsp -; AVX2-NEXT: .cfi_def_cfa_offset 16 -; AVX2-NEXT: popq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 8 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: v_test_canonicalize__half: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: movzwl (%rdi), %eax -; AVX512F-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx -; AVX512F-NEXT: vmovd %ecx, %xmm0 -; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %eax, %xmm1 -; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1 -; AVX512F-NEXT: vmulss %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX512F-NEXT: vmovd %xmm0, %eax -; AVX512F-NEXT: movw %ax, (%rdi) -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: v_test_canonicalize__half: -; AVX512BW: # %bb.0: # %entry -; AVX512BW-NEXT: movzwl (%rdi), %eax -; AVX512BW-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx -; AVX512BW-NEXT: vmovd %ecx, %xmm0 -; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX512BW-NEXT: vmovd %eax, %xmm1 -; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 -; AVX512BW-NEXT: vmulss %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] -; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovd %xmm0, %eax -; AVX512BW-NEXT: movw %ax, (%rdi) -; AVX512BW-NEXT: retq -entry: - %val = load half, half addrspace(1)* %out - %canonicalized = call half @llvm.canonicalize.f16(half %val) - store half %canonicalized, half addrspace(1)* %out - ret void -} - define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 { -; SSE-LABEL: v_test_canonicalize_var_f64: -; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] -; SSE-NEXT: mulsd (%rdi), %xmm0 -; SSE-NEXT: movsd %xmm0, (%rdi) -; SSE-NEXT: retq +; SSE1-LABEL: v_test_canonicalize_var_f64: +; SSE1: # %bb.0: +; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE1-NEXT: fld1 +; SSE1-NEXT: fmull (%eax) +; SSE1-NEXT: fstpl (%eax) +; SSE1-NEXT: retl +; +; SSE2-LABEL: v_test_canonicalize_var_f64: +; SSE2: # %bb.0: +; SSE2-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; SSE2-NEXT: mulsd (%rdi), %xmm0 +; SSE2-NEXT: movsd %xmm0, (%rdi) +; SSE2-NEXT: retq ; ; AVX-LABEL: v_test_canonicalize_var_f64: ; AVX: # %bb.0: @@ -533,11 +360,18 @@ define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 { } define void @canonicalize_undef(double addrspace(1)* %out) { -; SSE-LABEL: canonicalize_undef: -; SSE: # %bb.0: -; SSE-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; SSE-NEXT: movq %rax, (%rdi) -; SSE-NEXT: retq +; SSE1-LABEL: canonicalize_undef: +; SSE1: # %bb.0: +; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE1-NEXT: movl $2146959360, 4(%eax) # imm = 0x7FF80000 +; SSE1-NEXT: movl $0, (%eax) +; SSE1-NEXT: retl +; +; SSE2-LABEL: canonicalize_undef: +; SSE2: # %bb.0: +; SSE2-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; SSE2-NEXT: movq %rax, (%rdi) +; SSE2-NEXT: retq ; ; AVX-LABEL: canonicalize_undef: ; AVX: # %bb.0: @@ -552,6 +386,5 @@ define void @canonicalize_undef(double addrspace(1)* %out) { declare double @llvm.canonicalize.f64(double) declare float @llvm.canonicalize.f32(float) declare x86_fp80 @llvm.canonicalize.f80(x86_fp80) -declare half @llvm.canonicalize.f16(half) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; SSE2: {{.*}} +; SSE: {{.*}} From b9d2cf87f2e08c2c5cd473243cf2690b25c9f4f9 Mon Sep 17 00:00:00 2001 From: Pawan Anil Nirpal Date: Thu, 12 Sep 2024 11:04:15 +0200 Subject: [PATCH 07/10] minor refactors --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 25 +-- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +- .../CodeGen/X86/canonicalize-vars-f16-type.ll | 40 +---- llvm/test/CodeGen/X86/canonicalize-vars.ll | 142 +++++++----------- 4 files changed, 82 insertions(+), 133 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 54bcc0aeb06bb..84550b1d3ce85 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -508,6 +508,7 @@ namespace { SDValue visitFSQRT(SDNode *N); SDValue visitFCOPYSIGN(SDNode *N); SDValue visitFPOW(SDNode *N); + SDValue visitFCANONICALIZE(SDNode *N); SDValue visitSINT_TO_FP(SDNode *N); SDValue visitUINT_TO_FP(SDNode *N); SDValue visitFP_TO_SINT(SDNode *N); @@ -1980,16 +1981,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FREEZE: return visitFREEZE(N); case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N); case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N); - case ISD::FCANONICALIZE:{ - SDValue Operand = N->getOperand(0); - EVT VT = Operand.getValueType(); - SDLoc dl(N); - if(Operand.isUndef()){ - APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics()); - return DAG.getConstantFP(CanonicalQNaN, dl, VT); - } - break; - } + case ISD::FCANONICALIZE: return visitFCANONICALIZE(N); case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -2100,6 +2092,19 @@ static SDValue getInputChainForNode(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFCANONICALIZE(SDNode *N) { + SDValue Operand = N->getOperand(0); + EVT VT = Operand.getValueType(); + SDLoc dl(N); + + // Canonicalize undef to quiet NaN. + if (Operand.isUndef()) { + APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics()); + return DAG.getConstantFP(CanonicalQNaN, dl, VT); + } + return SDValue(); +} + SDValue DAGCombiner::visitTokenFactor(SDNode *N) { // If N has two operands, where one has an input chain equal to the other, // the 'other' chain is redundant. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 78bbf6473f788..01275df21ebde 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58160,10 +58160,10 @@ static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG, return SDValue(); } -SDValue combineCanonicalize(SDNode *Node, SelectionDAG &DAG) { - SDValue Operand = Node->getOperand(0); +static SDValue combineCanonicalize(SDNode *N, SelectionDAG &DAG) { + SDValue Operand = N->getOperand(0); EVT VT = Operand.getValueType(); - SDLoc dl(Node); + SDLoc dl(N); // Canonicalize scalar variable FP Nodes. SDValue One = @@ -58218,7 +58218,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); case ISD::OR: return combineOr(N, DAG, DCI, Subtarget); case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget); - case ISD::FCANONICALIZE: return combineCanonicalize(N,DAG); + case ISD::FCANONICALIZE: return combineCanonicalize(N, DAG); case ISD::BITREVERSE: return combineBITREVERSE(N, DAG, DCI, Subtarget); case ISD::AVGCEILS: case ISD::AVGCEILU: diff --git a/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll b/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll index d42ed4fc3831f..3a1947a62cb6d 100644 --- a/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll +++ b/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll @@ -1,19 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5 +; RUN: llc -mattr=+sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=SSE +; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX1 +; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX2 +; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX512F +; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX512BW -; RUN: llc -mattr=+sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=SSE,SSE2 -; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX1 -; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX2 -; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512F -; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512BW - -define void @v_test_canonicalize__half(half addrspace(1)* %out) { +define void @v_test_canonicalize__half(half addrspace(1)* %out) nounwind { ; SSE-LABEL: v_test_canonicalize__half: ; SSE: # %bb.0: # %entry ; SSE-NEXT: pushq %rbx -; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: subq $16, %rsp -; SSE-NEXT: .cfi_def_cfa_offset 32 -; SSE-NEXT: .cfi_offset %rbx, -16 ; SSE-NEXT: movq %rdi, %rbx ; SSE-NEXT: pinsrw $0, (%rdi), %xmm0 ; SSE-NEXT: callq __extendhfsf2@PLT @@ -25,18 +21,13 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) { ; SSE-NEXT: pextrw $0, %xmm0, %eax ; SSE-NEXT: movw %ax, (%rbx) ; SSE-NEXT: addq $16, %rsp -; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: popq %rbx -; SSE-NEXT: .cfi_def_cfa_offset 8 ; SSE-NEXT: retq ; ; AVX1-LABEL: v_test_canonicalize__half: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: subq $16, %rsp -; AVX1-NEXT: .cfi_def_cfa_offset 32 -; AVX1-NEXT: .cfi_offset %rbx, -16 ; AVX1-NEXT: movq %rdi, %rbx ; AVX1-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 ; AVX1-NEXT: callq __extendhfsf2@PLT @@ -47,18 +38,13 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) { ; AVX1-NEXT: callq __truncsfhf2@PLT ; AVX1-NEXT: vpextrw $0, %xmm0, (%rbx) ; AVX1-NEXT: addq $16, %rsp -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: popq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 8 ; AVX1-NEXT: retq ; ; AVX2-LABEL: v_test_canonicalize__half: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: subq $16, %rsp -; AVX2-NEXT: .cfi_def_cfa_offset 32 -; AVX2-NEXT: .cfi_offset %rbx, -16 ; AVX2-NEXT: movq %rdi, %rbx ; AVX2-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 ; AVX2-NEXT: callq __extendhfsf2@PLT @@ -69,9 +55,7 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) { ; AVX2-NEXT: callq __truncsfhf2@PLT ; AVX2-NEXT: vpextrw $0, %xmm0, (%rbx) ; AVX2-NEXT: addq $16, %rsp -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: popq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 8 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: v_test_canonicalize__half: @@ -112,12 +96,10 @@ entry: ret void } - -define half @complex_canonicalize_fmul_half(half %a, half %b) { +define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind { ; SSE-LABEL: complex_canonicalize_fmul_half: ; SSE: # %bb.0: # %entry ; SSE-NEXT: pushq %rax -; SSE-NEXT: .cfi_def_cfa_offset 16 ; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; SSE-NEXT: callq __extendhfsf2@PLT ; SSE-NEXT: movss %xmm0, (%rsp) # 4-byte Spill @@ -147,13 +129,11 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) { ; SSE-NEXT: subss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload ; SSE-NEXT: callq __truncsfhf2@PLT ; SSE-NEXT: popq %rax -; SSE-NEXT: .cfi_def_cfa_offset 8 ; SSE-NEXT: retq ; ; AVX1-LABEL: complex_canonicalize_fmul_half: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: pushq %rax -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; AVX1-NEXT: callq __extendhfsf2@PLT ; AVX1-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill @@ -182,13 +162,11 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) { ; AVX1-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload ; AVX1-NEXT: callq __truncsfhf2@PLT ; AVX1-NEXT: popq %rax -; AVX1-NEXT: .cfi_def_cfa_offset 8 ; AVX1-NEXT: retq ; ; AVX2-LABEL: complex_canonicalize_fmul_half: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: pushq %rax -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; AVX2-NEXT: callq __extendhfsf2@PLT ; AVX2-NEXT: vmovss %xmm0, (%rsp) # 4-byte Spill @@ -217,7 +195,6 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) { ; AVX2-NEXT: vsubss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload ; AVX2-NEXT: callq __truncsfhf2@PLT ; AVX2-NEXT: popq %rax -; AVX2-NEXT: .cfi_def_cfa_offset 8 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: complex_canonicalize_fmul_half: @@ -294,6 +271,3 @@ entry: } declare half @llvm.canonicalize.f16(half) -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; AVX: {{.*}} -; SSE2: {{.*}} diff --git a/llvm/test/CodeGen/X86/canonicalize-vars.ll b/llvm/test/CodeGen/X86/canonicalize-vars.ll index fe399ce127498..add8dbce812c1 100644 --- a/llvm/test/CodeGen/X86/canonicalize-vars.ll +++ b/llvm/test/CodeGen/X86/canonicalize-vars.ll @@ -1,10 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5 -; RUN: llc < %s -mtriple=i686-- --mattr=-sse2 | FileCheck %s -check-prefixes=SSE,SSE1 -; RUN: llc -mattr=+sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=SSE,SSE2 -; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX1 -; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX2 -; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512F -; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512BW +; RUN: llc -mtriple=i686-- --mattr=-sse2 < %s | FileCheck %s -check-prefixes=SSE1 +; RUN: llc -mattr=+sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=SSE2 +; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX1 +; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX1,AVX2 +; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX1,AVX512F define float @canon_fp32_varargsf32(float %a) { ; SSE1-LABEL: canon_fp32_varargsf32: @@ -18,10 +17,11 @@ define float @canon_fp32_varargsf32(float %a) { ; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: retq ; -; AVX-LABEL: canon_fp32_varargsf32: -; AVX: # %bb.0: -; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX1-LABEL: canon_fp32_varargsf32: +; AVX1: # %bb.0: +; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: retq + %canonicalized = call float @llvm.canonicalize.f32(float %a) ret float %canonicalized } @@ -41,12 +41,12 @@ define x86_fp80 @canon_fp32_varargsf80(x86_fp80 %a) { ; SSE2-NEXT: fmulp %st, %st(1) ; SSE2-NEXT: retq ; -; AVX-LABEL: canon_fp32_varargsf80: -; AVX: # %bb.0: -; AVX-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX-NEXT: fld1 -; AVX-NEXT: fmulp %st, %st(1) -; AVX-NEXT: retq +; AVX1-LABEL: canon_fp32_varargsf80: +; AVX1: # %bb.0: +; AVX1-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX1-NEXT: fld1 +; AVX1-NEXT: fmulp %st, %st(1) +; AVX1-NEXT: retq %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %a) ret x86_fp80 %canonicalized } @@ -78,18 +78,18 @@ define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) { ; SSE2-NEXT: fsubp %st, %st(1) ; SSE2-NEXT: retq ; -; AVX-LABEL: complex_canonicalize_fmul_x86_fp80: -; AVX: # %bb.0: # %entry -; AVX-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX-NEXT: fldt {{[0-9]+}}(%rsp) -; AVX-NEXT: fsub %st(1), %st -; AVX-NEXT: fld %st(0) -; AVX-NEXT: fadd %st(2), %st -; AVX-NEXT: fsubp %st, %st(1) -; AVX-NEXT: fld1 -; AVX-NEXT: fmulp %st, %st(1) -; AVX-NEXT: fsubp %st, %st(1) -; AVX-NEXT: retq +; AVX1-LABEL: complex_canonicalize_fmul_x86_fp80: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX1-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX1-NEXT: fsub %st(1), %st +; AVX1-NEXT: fld %st(0) +; AVX1-NEXT: fadd %st(2), %st +; AVX1-NEXT: fsubp %st, %st(1) +; AVX1-NEXT: fld1 +; AVX1-NEXT: fmulp %st, %st(1) +; AVX1-NEXT: fsubp %st, %st(1) +; AVX1-NEXT: retq entry: %mul1 = fsub x86_fp80 %a, %b @@ -145,14 +145,6 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { ; SSE2-NEXT: movapd %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; AVX1-LABEL: canonicalize_fp64: -; AVX1: # %bb.0: # %start -; AVX1-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 -; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: retq -; ; AVX2-LABEL: canonicalize_fp64: ; AVX2: # %bb.0: # %start ; AVX2-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 @@ -168,14 +160,6 @@ define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 { ; AVX512F-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} ; AVX512F-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: canonicalize_fp64: -; AVX512BW: # %bb.0: # %start -; AVX512BW-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 -; AVX512BW-NEXT: vcmpunordsd %xmm0, %xmm0, %k1 -; AVX512BW-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} -; AVX512BW-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 -; AVX512BW-NEXT: retq start: %c = fcmp olt double %a, %b @@ -231,14 +215,6 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 { ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: retq ; -; AVX1-LABEL: canonicalize_fp32: -; AVX1: # %bb.0: # %start -; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2 -; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 -; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: retq -; ; AVX2-LABEL: canonicalize_fp32: ; AVX2: # %bb.0: # %start ; AVX2-NEXT: vmaxss %xmm0, %xmm1, %xmm2 @@ -254,14 +230,6 @@ define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 { ; AVX512F-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} ; AVX512F-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 ; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: canonicalize_fp32: -; AVX512BW: # %bb.0: # %start -; AVX512BW-NEXT: vmaxss %xmm0, %xmm1, %xmm2 -; AVX512BW-NEXT: vcmpunordss %xmm0, %xmm0, %k1 -; AVX512BW-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1} -; AVX512BW-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 -; AVX512BW-NEXT: retq start: %cc = fcmp olt float %aa, %bb @@ -288,12 +256,12 @@ define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 { ; SSE2-NEXT: movss %xmm0, (%rdi) ; SSE2-NEXT: retq ; -; AVX-LABEL: v_test_canonicalize_var_f32: -; AVX: # %bb.0: -; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] -; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0 -; AVX-NEXT: vmovss %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: v_test_canonicalize_var_f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; AVX1-NEXT: vmulss (%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vmovss %xmm0, (%rdi) +; AVX1-NEXT: retq %val = load float, float addrspace(1)* %out %canonicalized = call float @llvm.canonicalize.f32(float %val) store float %canonicalized, float addrspace(1)* %out @@ -318,13 +286,14 @@ define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 { ; SSE2-NEXT: fstpt (%rdi) ; SSE2-NEXT: retq ; -; AVX-LABEL: v_test_canonicalize_x86_fp80: -; AVX: # %bb.0: -; AVX-NEXT: fldt (%rdi) -; AVX-NEXT: fld1 -; AVX-NEXT: fmulp %st, %st(1) -; AVX-NEXT: fstpt (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: v_test_canonicalize_x86_fp80: +; AVX1: # %bb.0: +; AVX1-NEXT: fldt (%rdi) +; AVX1-NEXT: fld1 +; AVX1-NEXT: fmulp %st, %st(1) +; AVX1-NEXT: fstpt (%rdi) +; AVX1-NEXT: retq + %val = load x86_fp80, x86_fp80 addrspace(1)* %out %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %val) store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out @@ -347,12 +316,13 @@ define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 { ; SSE2-NEXT: movsd %xmm0, (%rdi) ; SSE2-NEXT: retq ; -; AVX-LABEL: v_test_canonicalize_var_f64: -; AVX: # %bb.0: -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] -; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 -; AVX-NEXT: vmovsd %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: v_test_canonicalize_var_f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; AVX1-NEXT: vmulsd (%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vmovsd %xmm0, (%rdi) +; AVX1-NEXT: retq + %val = load double, double addrspace(1)* %out %canonicalized = call double @llvm.canonicalize.f64(double %val) store double %canonicalized, double addrspace(1)* %out @@ -373,11 +343,12 @@ define void @canonicalize_undef(double addrspace(1)* %out) { ; SSE2-NEXT: movq %rax, (%rdi) ; SSE2-NEXT: retq ; -; AVX-LABEL: canonicalize_undef: -; AVX: # %bb.0: -; AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 -; AVX-NEXT: movq %rax, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: canonicalize_undef: +; AVX1: # %bb.0: +; AVX1-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000 +; AVX1-NEXT: movq %rax, (%rdi) +; AVX1-NEXT: retq + %canonicalized = call double @llvm.canonicalize.f64(double undef) store double %canonicalized, double addrspace(1)* %out ret void @@ -386,5 +357,4 @@ define void @canonicalize_undef(double addrspace(1)* %out) { declare double @llvm.canonicalize.f64(double) declare float @llvm.canonicalize.f32(float) declare x86_fp80 @llvm.canonicalize.f80(x86_fp80) -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; SSE: {{.*}} + From 7a7767748b01df4b90870265185654d503e3d3dd Mon Sep 17 00:00:00 2001 From: Pawan Anil Nirpal Date: Wed, 18 Sep 2024 13:21:58 +0200 Subject: [PATCH 08/10] handling vector inputs and moving to lowering --- llvm/lib/Target/X86/X86ISelLowering.cpp | 74 +++-- .../CodeGen/X86/canonicalize-vars-f16-type.ll | 152 ++++++++- llvm/test/CodeGen/X86/canonicalize-vars.ll | 298 +++++++++++++++++- 3 files changed, 489 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 01275df21ebde..01ca105a06963 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -331,9 +331,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); } + setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom); if (Subtarget.is64Bit()) { setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom); } } @@ -345,6 +347,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (!Subtarget.hasSSE2()) { setOperationAction(ISD::BITCAST , MVT::f32 , Expand); setOperationAction(ISD::BITCAST , MVT::i32 , Expand); + setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom); if (Subtarget.is64Bit()) { setOperationAction(ISD::BITCAST , MVT::f64 , Expand); // Without SSE, i64->f64 goes through memory. @@ -708,6 +713,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote); setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom); @@ -924,6 +930,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (isTypeLegal(MVT::f80)) { setOperationAction(ISD::FP_ROUND, MVT::f80, Custom); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom); } setOperationAction(ISD::SETCC, MVT::f128, Custom); @@ -1042,6 +1049,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // No operations on x86mmx supported, everything uses intrinsics. } + /* + MVT::v4f32, MVT::v8f32, MVT::v16f32, + MVT::v2f64, MVT::v4f64, MVT::v8f64 + */ + if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) { addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); @@ -1057,9 +1069,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::SELECT, MVT::v4f32, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom); setOperationAction(ISD::LOAD, MVT::v2f32, Custom); setOperationAction(ISD::STORE, MVT::v2f32, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom); setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); @@ -1120,6 +1134,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMULO, MVT::v2i32, Custom); setOperationAction(ISD::FNEG, MVT::v2f64, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom); setOperationAction(ISD::FABS, MVT::v2f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom); @@ -1452,6 +1467,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMAXIMUM, VT, Custom); setOperationAction(ISD::FMINIMUM, VT, Custom); + setOperationAction(ISD::FCANONICALIZE, VT, Custom); } setOperationAction(ISD::LRINT, MVT::v8f32, Custom); @@ -1717,6 +1733,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Custom); // There is no byte sized k-register load or store without AVX512DQ. if (!Subtarget.hasDQI()) { @@ -1796,6 +1815,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMA, VT, Legal); setOperationAction(ISD::STRICT_FMA, VT, Legal); setOperationAction(ISD::FCOPYSIGN, VT, Custom); + setOperationAction(ISD::FCANONICALIZE, VT, Custom); } setOperationAction(ISD::LRINT, MVT::v16f32, Subtarget.hasDQI() ? Legal : Custom); @@ -2559,7 +2579,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::STRICT_FMA, ISD::FMINNUM, ISD::FMAXNUM, - ISD::FCANONICALIZE, ISD::SUB, ISD::LOAD, ISD::LRINT, @@ -32407,6 +32426,24 @@ static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget, return Op; } +static SDValue LowerFCanonicalize(SDValue Op, SelectionDAG &DAG) { + SDNode *N = Op.getNode(); + SDValue Operand = N->getOperand(0); + EVT VT = Operand.getValueType(); + SDLoc dl(N); + + SDValue One = DAG.getConstantFP(1.0, dl, VT); + + // TODO: Fix Crash for bf16 when generating strict_fmul as it + // leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0, + // ConstantFP:bf16, t5 LLVM ERROR: Do not know how to soft + // promote this operator's result! + SDValue Chain = DAG.getEntryNode(); + SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other}, + {Chain, Operand, One}); + return StrictFmul; +} + static StringRef getInstrStrFromOpNo(const SmallVectorImpl &AsmStrs, unsigned OpNo) { const APInt Operand(32, OpNo); @@ -32545,6 +32582,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); case ISD::FSHL: case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG); + case ISD::FCANONICALIZE: return LowerFCanonicalize(Op, DAG); case ISD::STRICT_SINT_TO_FP: case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::STRICT_UINT_TO_FP: @@ -58160,24 +58198,23 @@ static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue combineCanonicalize(SDNode *N, SelectionDAG &DAG) { - SDValue Operand = N->getOperand(0); - EVT VT = Operand.getValueType(); - SDLoc dl(N); +// static SDValue combineCanonicalize(SDNode *N, SelectionDAG &DAG) { +// SDValue Operand = N->getOperand(0); +// EVT VT = Operand.getValueType(); +// SDLoc dl(N); - // Canonicalize scalar variable FP Nodes. - SDValue One = - DAG.getNode(ISD::SINT_TO_FP, dl, VT, DAG.getConstant(1, dl, MVT::i32)); - // TODO: Fix Crash for bf16 when generating strict_fmul as it - // leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0, - // ConstantFP:bf16, t5 LLVM ERROR: Do not know how to soft - // promote this operator's result! - SDValue Chain = DAG.getEntryNode(); - SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other}, - {Chain, One, Operand}); - return StrictFmul; - // TODO : Hanlde vectors. -} +// SDValue One = DAG.getConstantFP(1.0, dl, VT); + +// // TODO: Fix Crash for bf16 when generating strict_fmul as it +// // leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul +// t0, +// // ConstantFP:bf16, t5 LLVM ERROR: Do not know how to soft +// // promote this operator's result! +// SDValue Chain = DAG.getEntryNode(); +// SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other}, +// {Chain, Operand, One}); +// return StrictFmul; +// } SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -58218,7 +58255,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); case ISD::OR: return combineOr(N, DAG, DCI, Subtarget); case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget); - case ISD::FCANONICALIZE: return combineCanonicalize(N, DAG); case ISD::BITREVERSE: return combineBITREVERSE(N, DAG, DCI, Subtarget); case ISD::AVGCEILS: case ISD::AVGCEILU: diff --git a/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll b/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll index 3a1947a62cb6d..52048a0a2065b 100644 --- a/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll +++ b/llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll @@ -66,7 +66,7 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) nounwind { ; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512F-NEXT: vmovd %eax, %xmm1 ; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1 -; AVX512F-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -82,7 +82,7 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) nounwind { ; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512BW-NEXT: vmovd %eax, %xmm1 ; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 -; AVX512BW-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vmulss %xmm0, %xmm1, %xmm0 ; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] ; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -218,7 +218,7 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind { ; AVX512F-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax ; AVX512F-NEXT: vmovd %eax, %xmm2 ; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512F-NEXT: vmulss %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vmulss %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] ; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -250,7 +250,7 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind { ; AVX512BW-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax ; AVX512BW-NEXT: vmovd %eax, %xmm2 ; AVX512BW-NEXT: vcvtph2ps %xmm2, %xmm2 -; AVX512BW-NEXT: vmulss %xmm0, %xmm2, %xmm0 +; AVX512BW-NEXT: vmulss %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; AVX512BW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] ; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -270,4 +270,146 @@ entry: ret half %result } -declare half @llvm.canonicalize.f16(half) +define void @v_test_canonicalize_v2half(<2 x half> addrspace(1)* %out) nounwind { +; SSE-LABEL: v_test_canonicalize_v2half: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rbx +; SSE-NEXT: subq $48, %rsp +; SSE-NEXT: movq %rdi, %rbx +; SSE-NEXT: pinsrw $0, 2(%rdi), %xmm0 +; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: pinsrw $0, (%rdi), %xmm0 +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; SSE-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload +; SSE-NEXT: # xmm1 = mem[0],zero,zero,zero +; SSE-NEXT: mulss %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: callq __extendhfsf2@PLT +; SSE-NEXT: mulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE-NEXT: callq __truncsfhf2@PLT +; SSE-NEXT: pextrw $0, %xmm0, %eax +; SSE-NEXT: movw %ax, 2(%rbx) +; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE-NEXT: pextrw $0, %xmm0, %eax +; SSE-NEXT: movw %ax, (%rbx) +; SSE-NEXT: addq $48, %rsp +; SSE-NEXT: popq %rbx +; SSE-NEXT: retq +; +; AVX1-LABEL: v_test_canonicalize_v2half: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: pushq %rbx +; AVX1-NEXT: subq $48, %rsp +; AVX1-NEXT: movq %rdi, %rbx +; AVX1-NEXT: vpinsrw $0, 2(%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; AVX1-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; AVX1-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX1-NEXT: callq __extendhfsf2@PLT +; AVX1-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX1-NEXT: callq __truncsfhf2@PLT +; AVX1-NEXT: vpextrw $0, %xmm0, 2(%rbx) +; AVX1-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX1-NEXT: vpextrw $0, %xmm0, (%rbx) +; AVX1-NEXT: addq $48, %rsp +; AVX1-NEXT: popq %rbx +; AVX1-NEXT: retq +; +; AVX2-LABEL: v_test_canonicalize_v2half: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: subq $48, %rsp +; AVX2-NEXT: movq %rdi, %rbx +; AVX2-NEXT: vpinsrw $0, 2(%rdi), %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; AVX2-NEXT: vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill +; AVX2-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX2-NEXT: callq __extendhfsf2@PLT +; AVX2-NEXT: vmulss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVX2-NEXT: callq __truncsfhf2@PLT +; AVX2-NEXT: vpextrw $0, %xmm0, 2(%rbx) +; AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX2-NEXT: vpextrw $0, %xmm0, (%rbx) +; AVX2-NEXT: addq $48, %rsp +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: retq +; +; AVX512F-LABEL: v_test_canonicalize_v2half: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512F-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax +; AVX512F-NEXT: vmovd %eax, %xmm1 +; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX512F-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX512F-NEXT: vmulss %xmm1, %xmm2, %xmm2 +; AVX512F-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3] +; AVX512F-NEXT: vcvtps2ph $4, %xmm2, %xmm2 +; AVX512F-NEXT: vmovd %xmm2, %eax +; AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm2 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512F-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3] +; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512F-NEXT: vmovd %xmm0, %eax +; AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; AVX512F-NEXT: vmovd %xmm0, (%rdi) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: v_test_canonicalize_v2half: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512BW-NEXT: movzwl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax +; AVX512BW-NEXT: vmovd %eax, %xmm1 +; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vcvtph2ps %xmm2, %xmm2 +; AVX512BW-NEXT: vmulss %xmm1, %xmm2, %xmm2 +; AVX512BW-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; AVX512BW-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3] +; AVX512BW-NEXT: vcvtps2ph $4, %xmm2, %xmm2 +; AVX512BW-NEXT: vmovd %xmm2, %eax +; AVX512BW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm2 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 +; AVX512BW-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3] +; AVX512BW-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; AVX512BW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; AVX512BW-NEXT: vmovd %xmm0, (%rdi) +; AVX512BW-NEXT: retq +entry: + %val = load <2 x half>, <2 x half> addrspace(1)* %out + %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val) + store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out + ret void +} + diff --git a/llvm/test/CodeGen/X86/canonicalize-vars.ll b/llvm/test/CodeGen/X86/canonicalize-vars.ll index add8dbce812c1..13ea53389411b 100644 --- a/llvm/test/CodeGen/X86/canonicalize-vars.ll +++ b/llvm/test/CodeGen/X86/canonicalize-vars.ll @@ -251,15 +251,15 @@ define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 { ; ; SSE2-LABEL: v_test_canonicalize_var_f32: ; SSE2: # %bb.0: -; SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] -; SSE2-NEXT: mulss (%rdi), %xmm0 +; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: movss %xmm0, (%rdi) ; SSE2-NEXT: retq ; ; AVX1-LABEL: v_test_canonicalize_var_f32: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] -; AVX1-NEXT: vmulss (%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vmovss %xmm0, (%rdi) ; AVX1-NEXT: retq %val = load float, float addrspace(1)* %out @@ -311,15 +311,15 @@ define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 { ; ; SSE2-LABEL: v_test_canonicalize_var_f64: ; SSE2: # %bb.0: -; SSE2-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] -; SSE2-NEXT: mulsd (%rdi), %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: movsd %xmm0, (%rdi) ; SSE2-NEXT: retq ; ; AVX1-LABEL: v_test_canonicalize_var_f64: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] -; AVX1-NEXT: vmulsd (%rdi), %xmm0, %xmm0 +; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vmovsd %xmm0, (%rdi) ; AVX1-NEXT: retq @@ -354,7 +354,283 @@ define void @canonicalize_undef(double addrspace(1)* %out) { ret void } -declare double @llvm.canonicalize.f64(double) -declare float @llvm.canonicalize.f32(float) -declare x86_fp80 @llvm.canonicalize.f80(x86_fp80) +define <4 x float> @canon_fp32_varargsv4f32(<4 x float> %a) { +; SSE1-LABEL: canon_fp32_varargsv4f32: +; SSE1: # %bb.0: +; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE1-NEXT: fld1 +; SSE1-NEXT: fld %st(0) +; SSE1-NEXT: fmuls {{[0-9]+}}(%esp) +; SSE1-NEXT: fld %st(1) +; SSE1-NEXT: fmuls {{[0-9]+}}(%esp) +; SSE1-NEXT: fld %st(2) +; SSE1-NEXT: fmuls {{[0-9]+}}(%esp) +; SSE1-NEXT: fxch %st(3) +; SSE1-NEXT: fmuls {{[0-9]+}}(%esp) +; SSE1-NEXT: fstps 12(%eax) +; SSE1-NEXT: fxch %st(2) +; SSE1-NEXT: fstps 8(%eax) +; SSE1-NEXT: fxch %st(1) +; SSE1-NEXT: fstps 4(%eax) +; SSE1-NEXT: fstps (%eax) +; SSE1-NEXT: retl $4 +; +; SSE2-LABEL: canon_fp32_varargsv4f32: +; SSE2: # %bb.0: +; SSE2-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: retq +; +; AVX2-LABEL: canon_fp32_varargsv4f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX2-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: canon_fp32_varargsv4f32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512F-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq + %canonicalized = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %a) + ret <4 x float> %canonicalized +} + +define <4 x double> @canon_fp64_varargsv4f64(<4 x double> %a) { +; SSE1-LABEL: canon_fp64_varargsv4f64: +; SSE1: # %bb.0: +; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE1-NEXT: fld1 +; SSE1-NEXT: fld %st(0) +; SSE1-NEXT: fmull {{[0-9]+}}(%esp) +; SSE1-NEXT: fld %st(1) +; SSE1-NEXT: fmull {{[0-9]+}}(%esp) +; SSE1-NEXT: fld %st(2) +; SSE1-NEXT: fmull {{[0-9]+}}(%esp) +; SSE1-NEXT: fxch %st(3) +; SSE1-NEXT: fmull {{[0-9]+}}(%esp) +; SSE1-NEXT: fstpl 24(%eax) +; SSE1-NEXT: fxch %st(2) +; SSE1-NEXT: fstpl 16(%eax) +; SSE1-NEXT: fxch %st(1) +; SSE1-NEXT: fstpl 8(%eax) +; SSE1-NEXT: fstpl (%eax) +; SSE1-NEXT: retl $4 +; +; SSE2-LABEL: canon_fp64_varargsv4f64: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd {{.*#+}} xmm2 = [1.0E+0,1.0E+0] +; SSE2-NEXT: mulpd %xmm2, %xmm0 +; SSE2-NEXT: mulpd %xmm2, %xmm1 +; SSE2-NEXT: retq +; +; AVX2-LABEL: canon_fp64_varargsv4f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: canon_fp64_varargsv4f64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512F-NEXT: vmulpd %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq + %canonicalized = call <4 x double> @llvm.canonicalize.v4f32(<4 x double> %a) + ret <4 x double> %canonicalized +} + +define <2 x x86_fp80> @canon_fp80_varargsv2fp80(<2 x x86_fp80> %a) { +; SSE1-LABEL: canon_fp80_varargsv2fp80: +; SSE1: # %bb.0: +; SSE1-NEXT: fldt {{[0-9]+}}(%esp) +; SSE1-NEXT: fldt {{[0-9]+}}(%esp) +; SSE1-NEXT: fld1 +; SSE1-NEXT: fmul %st, %st(1) +; SSE1-NEXT: fmulp %st, %st(2) +; SSE1-NEXT: fxch %st(1) +; SSE1-NEXT: retl +; +; SSE2-LABEL: canon_fp80_varargsv2fp80: +; SSE2: # %bb.0: +; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE2-NEXT: fldt {{[0-9]+}}(%rsp) +; SSE2-NEXT: fld1 +; SSE2-NEXT: fmul %st, %st(1) +; SSE2-NEXT: fmulp %st, %st(2) +; SSE2-NEXT: fxch %st(1) +; SSE2-NEXT: retq +; +; AVX1-LABEL: canon_fp80_varargsv2fp80: +; AVX1: # %bb.0: +; AVX1-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX1-NEXT: fldt {{[0-9]+}}(%rsp) +; AVX1-NEXT: fld1 +; AVX1-NEXT: fmul %st, %st(1) +; AVX1-NEXT: fmulp %st, %st(2) +; AVX1-NEXT: fxch %st(1) +; AVX1-NEXT: retq + %canonicalized = call <2 x x86_fp80> @llvm.canonicalize.v2f80(<2 x x86_fp80> %a) + ret <2 x x86_fp80> %canonicalized +} + +define void @vec_canonicalize_var_v4f32(<4 x float> addrspace(1)* %out) #1 { +; SSE1-LABEL: vec_canonicalize_var_v4f32: +; SSE1: # %bb.0: +; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE1-NEXT: fld1 +; SSE1-NEXT: fld %st(0) +; SSE1-NEXT: fmuls (%eax) +; SSE1-NEXT: fld %st(1) +; SSE1-NEXT: fmuls 4(%eax) +; SSE1-NEXT: fld %st(2) +; SSE1-NEXT: fmuls 8(%eax) +; SSE1-NEXT: fxch %st(3) +; SSE1-NEXT: fmuls 12(%eax) +; SSE1-NEXT: fstps 12(%eax) +; SSE1-NEXT: fxch %st(2) +; SSE1-NEXT: fstps 8(%eax) +; SSE1-NEXT: fxch %st(1) +; SSE1-NEXT: fstps 4(%eax) +; SSE1-NEXT: fstps (%eax) +; SSE1-NEXT: retl +; +; SSE2-LABEL: vec_canonicalize_var_v4f32: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps (%rdi), %xmm0 +; SSE2-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: movaps %xmm0, (%rdi) +; SSE2-NEXT: retq +; +; AVX2-LABEL: vec_canonicalize_var_v4f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX2-NEXT: vmulps (%rdi), %xmm0, %xmm0 +; AVX2-NEXT: vmovaps %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: vec_canonicalize_var_v4f32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512F-NEXT: vmulps (%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vmovaps %xmm0, (%rdi) +; AVX512F-NEXT: retq + %val = load <4 x float>, <4 x float> addrspace(1)* %out + %canonicalized = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %val) + store <4 x float> %canonicalized, <4 x float> addrspace(1)* %out + ret void +} + +define void @vec_canonicalize_var_v4f64(<4 x double> addrspace(1)* %out) #1 { +; SSE1-LABEL: vec_canonicalize_var_v4f64: +; SSE1: # %bb.0: +; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE1-NEXT: fld1 +; SSE1-NEXT: fld %st(0) +; SSE1-NEXT: fmull (%eax) +; SSE1-NEXT: fld %st(1) +; SSE1-NEXT: fmull 8(%eax) +; SSE1-NEXT: fld %st(2) +; SSE1-NEXT: fmull 16(%eax) +; SSE1-NEXT: fxch %st(3) +; SSE1-NEXT: fmull 24(%eax) +; SSE1-NEXT: fstpl 24(%eax) +; SSE1-NEXT: fxch %st(2) +; SSE1-NEXT: fstpl 16(%eax) +; SSE1-NEXT: fxch %st(1) +; SSE1-NEXT: fstpl 8(%eax) +; SSE1-NEXT: fstpl (%eax) +; SSE1-NEXT: retl +; +; SSE2-LABEL: vec_canonicalize_var_v4f64: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,1.0E+0] +; SSE2-NEXT: movapd 16(%rdi), %xmm1 +; SSE2-NEXT: mulpd %xmm0, %xmm1 +; SSE2-NEXT: mulpd (%rdi), %xmm0 +; SSE2-NEXT: movapd %xmm0, (%rdi) +; SSE2-NEXT: movapd %xmm1, 16(%rdi) +; SSE2-NEXT: retq +; +; AVX2-LABEL: vec_canonicalize_var_v4f64: +; AVX2: # %bb.0: +; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX2-NEXT: vmulpd (%rdi), %ymm0, %ymm0 +; AVX2-NEXT: vmovapd %ymm0, (%rdi) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: vec_canonicalize_var_v4f64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; AVX512F-NEXT: vmulpd (%rdi), %ymm0, %ymm0 +; AVX512F-NEXT: vmovapd %ymm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq + %val = load <4 x double>, <4 x double> addrspace(1)* %out + %canonicalized = call <4 x double> @llvm.canonicalize.v4f32(<4 x double> %val) + store <4 x double> %canonicalized, <4 x double> addrspace(1)* %out + ret void +} +define void @vec_canonicalize_x86_fp80(<4 x x86_fp80> addrspace(1)* %out) #1 { +; SSE1-LABEL: vec_canonicalize_x86_fp80: +; SSE1: # %bb.0: +; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE1-NEXT: fldt 30(%eax) +; SSE1-NEXT: fldt 20(%eax) +; SSE1-NEXT: fldt 10(%eax) +; SSE1-NEXT: fldt (%eax) +; SSE1-NEXT: fld1 +; SSE1-NEXT: fmul %st, %st(1) +; SSE1-NEXT: fmul %st, %st(2) +; SSE1-NEXT: fmul %st, %st(3) +; SSE1-NEXT: fmulp %st, %st(4) +; SSE1-NEXT: fxch %st(3) +; SSE1-NEXT: fstpt 30(%eax) +; SSE1-NEXT: fxch %st(1) +; SSE1-NEXT: fstpt 20(%eax) +; SSE1-NEXT: fstpt 10(%eax) +; SSE1-NEXT: fstpt (%eax) +; SSE1-NEXT: retl +; +; SSE2-LABEL: vec_canonicalize_x86_fp80: +; SSE2: # %bb.0: +; SSE2-NEXT: fldt 30(%rdi) +; SSE2-NEXT: fldt 20(%rdi) +; SSE2-NEXT: fldt 10(%rdi) +; SSE2-NEXT: fldt (%rdi) +; SSE2-NEXT: fld1 +; SSE2-NEXT: fmul %st, %st(1) +; SSE2-NEXT: fmul %st, %st(2) +; SSE2-NEXT: fmul %st, %st(3) +; SSE2-NEXT: fmulp %st, %st(4) +; SSE2-NEXT: fxch %st(3) +; SSE2-NEXT: fstpt 30(%rdi) +; SSE2-NEXT: fxch %st(1) +; SSE2-NEXT: fstpt 20(%rdi) +; SSE2-NEXT: fstpt 10(%rdi) +; SSE2-NEXT: fstpt (%rdi) +; SSE2-NEXT: retq +; +; AVX1-LABEL: vec_canonicalize_x86_fp80: +; AVX1: # %bb.0: +; AVX1-NEXT: fldt 30(%rdi) +; AVX1-NEXT: fldt 20(%rdi) +; AVX1-NEXT: fldt 10(%rdi) +; AVX1-NEXT: fldt (%rdi) +; AVX1-NEXT: fld1 +; AVX1-NEXT: fmul %st, %st(1) +; AVX1-NEXT: fmul %st, %st(2) +; AVX1-NEXT: fmul %st, %st(3) +; AVX1-NEXT: fmulp %st, %st(4) +; AVX1-NEXT: fxch %st(3) +; AVX1-NEXT: fstpt 30(%rdi) +; AVX1-NEXT: fxch %st(1) +; AVX1-NEXT: fstpt 20(%rdi) +; AVX1-NEXT: fstpt 10(%rdi) +; AVX1-NEXT: fstpt (%rdi) +; AVX1-NEXT: retq + %val = load <4 x x86_fp80>, <4 x x86_fp80> addrspace(1)* %out + %canonicalized = call <4 x x86_fp80> @llvm.canonicalize.f80(<4 x x86_fp80> %val) + store <4 x x86_fp80> %canonicalized, <4 x x86_fp80> addrspace(1)* %out + ret void +} From fa044097f5b6a6d07c82d223e02571c0a4b3310c Mon Sep 17 00:00:00 2001 From: Pawan Anil Nirpal Date: Wed, 18 Sep 2024 13:25:00 +0200 Subject: [PATCH 09/10] remove the rouge comment --- llvm/lib/Target/X86/X86ISelLowering.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0e5a09db6f3bb..896dc491b51d7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1057,11 +1057,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // No operations on x86mmx supported, everything uses intrinsics. } - /* - MVT::v4f32, MVT::v8f32, MVT::v16f32, - MVT::v2f64, MVT::v4f64, MVT::v8f64 - */ - if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) { addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); From ad860026bf88ab38a7609cc973866f017e028427 Mon Sep 17 00:00:00 2001 From: Pawan Anil Nirpal Date: Wed, 18 Sep 2024 13:26:39 +0200 Subject: [PATCH 10/10] remove the rouge comment --- llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 896dc491b51d7..0a3fa7a4a8fda 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58517,24 +58517,6 @@ static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG, return SDValue(); } -// static SDValue combineCanonicalize(SDNode *N, SelectionDAG &DAG) { -// SDValue Operand = N->getOperand(0); -// EVT VT = Operand.getValueType(); -// SDLoc dl(N); - -// SDValue One = DAG.getConstantFP(1.0, dl, VT); - -// // TODO: Fix Crash for bf16 when generating strict_fmul as it -// // leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul -// t0, -// // ConstantFP:bf16, t5 LLVM ERROR: Do not know how to soft -// // promote this operator's result! -// SDValue Chain = DAG.getEntryNode(); -// SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other}, -// {Chain, Operand, One}); -// return StrictFmul; -// } - SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG;