Skip to content

Commit 55432ea

Browse files
committed
[CodeGen] More consistently expand float ops by default
These float operations were expanded for scalar f32/f64/f128, but not for f16 and more problematically, not for vectors. A small subset of them was separately set to expand for vectors. Change these to always expand by default, and adjust targets to mark these as legal where necessary instead. This is a much safer default, and avoids unnecessary legalization failures because a target failed to manually mark them as expand. Fixes llvm#110753.
1 parent da7ec1e commit 55432ea

File tree

4 files changed

+154
-16
lines changed

4 files changed

+154
-16
lines changed

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,17 @@ void TargetLoweringBase::initActions() {
806806
ISD::SDIVFIX, ISD::SDIVFIXSAT,
807807
ISD::UDIVFIX, ISD::UDIVFIXSAT,
808808
ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
809-
ISD::IS_FPCLASS},
809+
ISD::IS_FPCLASS, ISD::FCBRT,
810+
ISD::FLOG, ISD::FLOG2,
811+
ISD::FLOG10, ISD::FEXP,
812+
ISD::FEXP2, ISD::FEXP10,
813+
ISD::FFLOOR, ISD::FNEARBYINT,
814+
ISD::FCEIL, ISD::FRINT,
815+
ISD::FTRUNC, ISD::FROUNDEVEN,
816+
ISD::FTAN, ISD::FACOS,
817+
ISD::FASIN, ISD::FATAN,
818+
ISD::FCOSH, ISD::FSINH,
819+
ISD::FTANH, ISD::FATAN2},
810820
VT, Expand);
811821

812822
// Overflow operations default to expand
@@ -856,8 +866,7 @@ void TargetLoweringBase::initActions() {
856866
{ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
857867
ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
858868
ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND,
859-
ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN,
860-
ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2},
869+
ISD::LLROUND},
861870
VT, Expand);
862871

863872
// Constrained floating-point operations default to expand.
@@ -914,15 +923,6 @@ void TargetLoweringBase::initActions() {
914923
{MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128},
915924
Expand);
916925

917-
// These library functions default to expand.
918-
setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
919-
ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR,
920-
ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
921-
ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN,
922-
ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH,
923-
ISD::FATAN2},
924-
{MVT::f32, MVT::f64, MVT::f128}, Expand);
925-
926926
// Insert custom handling default for llvm.canonicalize.*.
927927
setOperationAction(ISD::FCANONICALIZE,
928928
{MVT::f16, MVT::f32, MVT::f64, MVT::f128}, Expand);

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -391,8 +391,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
391391
// Library functions. These default to Expand, but we have instructions
392392
// for them.
393393
setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR,
394-
ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
395-
MVT::f32, Legal);
394+
ISD::FROUNDEVEN, ISD::FTRUNC},
395+
{MVT::f16, MVT::f32}, Legal);
396+
setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, MVT::f32, Legal);
396397

397398
setOperationAction(ISD::FLOG2, MVT::f32, Custom);
398399
setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);
@@ -412,9 +413,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
412413

413414
setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
414415

415-
if (Subtarget->has16BitInsts())
416+
if (Subtarget->has16BitInsts()) {
416417
setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
417-
else {
418+
setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Legal);
419+
} else {
418420
setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal);
419421
setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom);
420422
}

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,11 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
370370
setOperationAction(ISD::FMINNUM, VT, Legal);
371371
setOperationAction(ISD::FMAXNUM, VT, Legal);
372372
setOperationAction(ISD::FROUND, VT, Legal);
373+
setOperationAction(ISD::FROUNDEVEN, VT, Legal);
374+
setOperationAction(ISD::FRINT, VT, Legal);
375+
setOperationAction(ISD::FTRUNC, VT, Legal);
376+
setOperationAction(ISD::FFLOOR, VT, Legal);
377+
setOperationAction(ISD::FCEIL, VT, Legal);
373378
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
374379
setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
375380
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
@@ -1507,6 +1512,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
15071512
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
15081513

15091514
setOperationAction(ISD::FROUND, MVT::f16, Legal);
1515+
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
1516+
setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
1517+
setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
1518+
setOperationAction(ISD::FRINT, MVT::f16, Legal);
1519+
setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
1520+
setOperationAction(ISD::FCEIL, MVT::f16, Legal);
15101521
}
15111522

15121523
if (Subtarget->hasNEON()) {
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=powerpc64le < %s | FileCheck %s
3+
4+
define void @test(ptr %p1, ptr %p2) {
5+
; CHECK-LABEL: test:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: mflr 0
8+
; CHECK-NEXT: stdu 1, -224(1)
9+
; CHECK-NEXT: std 0, 240(1)
10+
; CHECK-NEXT: .cfi_def_cfa_offset 224
11+
; CHECK-NEXT: .cfi_offset lr, 16
12+
; CHECK-NEXT: .cfi_offset r27, -40
13+
; CHECK-NEXT: .cfi_offset r28, -32
14+
; CHECK-NEXT: .cfi_offset r29, -24
15+
; CHECK-NEXT: .cfi_offset r30, -16
16+
; CHECK-NEXT: .cfi_offset v24, -176
17+
; CHECK-NEXT: .cfi_offset v25, -160
18+
; CHECK-NEXT: .cfi_offset v26, -144
19+
; CHECK-NEXT: .cfi_offset v27, -128
20+
; CHECK-NEXT: .cfi_offset v28, -112
21+
; CHECK-NEXT: .cfi_offset v29, -96
22+
; CHECK-NEXT: .cfi_offset v30, -80
23+
; CHECK-NEXT: .cfi_offset v31, -64
24+
; CHECK-NEXT: li 5, 48
25+
; CHECK-NEXT: std 27, 184(1) # 8-byte Folded Spill
26+
; CHECK-NEXT: li 27, 16
27+
; CHECK-NEXT: std 28, 192(1) # 8-byte Folded Spill
28+
; CHECK-NEXT: std 29, 200(1) # 8-byte Folded Spill
29+
; CHECK-NEXT: li 29, 32
30+
; CHECK-NEXT: li 28, 48
31+
; CHECK-NEXT: std 30, 208(1) # 8-byte Folded Spill
32+
; CHECK-NEXT: stvx 24, 1, 5 # 16-byte Folded Spill
33+
; CHECK-NEXT: li 5, 64
34+
; CHECK-NEXT: mr 30, 4
35+
; CHECK-NEXT: stvx 25, 1, 5 # 16-byte Folded Spill
36+
; CHECK-NEXT: li 5, 80
37+
; CHECK-NEXT: stvx 26, 1, 5 # 16-byte Folded Spill
38+
; CHECK-NEXT: li 5, 96
39+
; CHECK-NEXT: lxvd2x 58, 0, 3
40+
; CHECK-NEXT: stvx 27, 1, 5 # 16-byte Folded Spill
41+
; CHECK-NEXT: li 5, 112
42+
; CHECK-NEXT: lxvd2x 59, 3, 27
43+
; CHECK-NEXT: stvx 28, 1, 5 # 16-byte Folded Spill
44+
; CHECK-NEXT: li 5, 128
45+
; CHECK-NEXT: stvx 29, 1, 5 # 16-byte Folded Spill
46+
; CHECK-NEXT: li 5, 144
47+
; CHECK-NEXT: stvx 30, 1, 5 # 16-byte Folded Spill
48+
; CHECK-NEXT: li 5, 160
49+
; CHECK-NEXT: lxvd2x 62, 3, 28
50+
; CHECK-NEXT: stvx 31, 1, 5 # 16-byte Folded Spill
51+
; CHECK-NEXT: lxvd2x 63, 3, 29
52+
; CHECK-NEXT: xxswapd 57, 58
53+
; CHECK-NEXT: xxswapd 1, 59
54+
; CHECK-NEXT: xxswapd 60, 62
55+
; CHECK-NEXT: xxswapd 61, 63
56+
; CHECK-NEXT: bl roundeven
57+
; CHECK-NEXT: nop
58+
; CHECK-NEXT: xxswapd 56, 1
59+
; CHECK-NEXT: xxlor 1, 59, 59
60+
; CHECK-NEXT: bl roundeven
61+
; CHECK-NEXT: nop
62+
; CHECK-NEXT: xxswapd 0, 1
63+
; CHECK-NEXT: xxlor 1, 60, 60
64+
; CHECK-NEXT: xxmrgld 59, 0, 56
65+
; CHECK-NEXT: bl roundeven
66+
; CHECK-NEXT: nop
67+
; CHECK-NEXT: xxswapd 60, 1
68+
; CHECK-NEXT: xxlor 1, 62, 62
69+
; CHECK-NEXT: bl roundeven
70+
; CHECK-NEXT: nop
71+
; CHECK-NEXT: xxswapd 0, 1
72+
; CHECK-NEXT: xxlor 1, 61, 61
73+
; CHECK-NEXT: xxmrgld 62, 0, 60
74+
; CHECK-NEXT: bl roundeven
75+
; CHECK-NEXT: nop
76+
; CHECK-NEXT: xxswapd 61, 1
77+
; CHECK-NEXT: xxlor 1, 63, 63
78+
; CHECK-NEXT: bl roundeven
79+
; CHECK-NEXT: nop
80+
; CHECK-NEXT: xxswapd 0, 1
81+
; CHECK-NEXT: xxlor 1, 57, 57
82+
; CHECK-NEXT: xxmrgld 63, 0, 61
83+
; CHECK-NEXT: bl roundeven
84+
; CHECK-NEXT: nop
85+
; CHECK-NEXT: xxswapd 61, 1
86+
; CHECK-NEXT: xxlor 1, 58, 58
87+
; CHECK-NEXT: bl roundeven
88+
; CHECK-NEXT: nop
89+
; CHECK-NEXT: li 3, 160
90+
; CHECK-NEXT: stxvd2x 63, 30, 29
91+
; CHECK-NEXT: xxswapd 0, 1
92+
; CHECK-NEXT: stxvd2x 62, 30, 28
93+
; CHECK-NEXT: stxvd2x 59, 30, 27
94+
; CHECK-NEXT: ld 29, 200(1) # 8-byte Folded Reload
95+
; CHECK-NEXT: ld 28, 192(1) # 8-byte Folded Reload
96+
; CHECK-NEXT: ld 27, 184(1) # 8-byte Folded Reload
97+
; CHECK-NEXT: lvx 31, 1, 3 # 16-byte Folded Reload
98+
; CHECK-NEXT: li 3, 144
99+
; CHECK-NEXT: xxmrgld 0, 0, 61
100+
; CHECK-NEXT: lvx 30, 1, 3 # 16-byte Folded Reload
101+
; CHECK-NEXT: li 3, 128
102+
; CHECK-NEXT: stxvd2x 0, 0, 30
103+
; CHECK-NEXT: ld 30, 208(1) # 8-byte Folded Reload
104+
; CHECK-NEXT: lvx 29, 1, 3 # 16-byte Folded Reload
105+
; CHECK-NEXT: li 3, 112
106+
; CHECK-NEXT: lvx 28, 1, 3 # 16-byte Folded Reload
107+
; CHECK-NEXT: li 3, 96
108+
; CHECK-NEXT: lvx 27, 1, 3 # 16-byte Folded Reload
109+
; CHECK-NEXT: li 3, 80
110+
; CHECK-NEXT: lvx 26, 1, 3 # 16-byte Folded Reload
111+
; CHECK-NEXT: li 3, 64
112+
; CHECK-NEXT: lvx 25, 1, 3 # 16-byte Folded Reload
113+
; CHECK-NEXT: li 3, 48
114+
; CHECK-NEXT: lvx 24, 1, 3 # 16-byte Folded Reload
115+
; CHECK-NEXT: addi 1, 1, 224
116+
; CHECK-NEXT: ld 0, 16(1)
117+
; CHECK-NEXT: mtlr 0
118+
; CHECK-NEXT: blr
119+
%v = load <8 x double>, ptr %p1, align 64
120+
%res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %v)
121+
store <8 x double> %res, ptr %p2, align 64
122+
ret void
123+
}
124+
125+
declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)

0 commit comments

Comments
 (0)