Skip to content

Commit 7733347

Browse files
authored
Merge pull request #1086 from apple/pr-61468474
[AArch64][GlobalISel] Fixup <32b heterogeneous regbanks of G_PHIs jus…
2 parents 53556b0 + 05f5e2a commit 7733347

File tree

2 files changed

+203
-0
lines changed

2 files changed

+203
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class AArch64InstructionSelector : public InstructionSelector {
6363
// cache it here for each run of the selector.
6464
ProduceNonFlagSettingCondBr =
6565
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
66+
processPHIs(MF);
6667
}
6768

6869
private:
@@ -77,6 +78,9 @@ class AArch64InstructionSelector : public InstructionSelector {
7778
// An early selection function that runs before the selectImpl() call.
7879
bool earlySelect(MachineInstr &I) const;
7980

81+
// Do some preprocessing of G_PHIs before we begin selection.
82+
void processPHIs(MachineFunction &MF);
83+
8084
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
8185

8286
/// Eliminate same-sized cross-bank copies into stores before selectImpl().
@@ -4755,6 +4759,95 @@ bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
47554759
}
47564760
}
47574761

4762+
4763+
// Perform fixups on the given PHI instruction's operands to force them all
4764+
// to be the same as the destination regbank.
4765+
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
4766+
const AArch64RegisterBankInfo &RBI) {
4767+
assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
4768+
Register DstReg = MI.getOperand(0).getReg();
4769+
const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
4770+
assert(DstRB && "Expected PHI dst to have regbank assigned");
4771+
MachineIRBuilder MIB(MI);
4772+
4773+
// Go through each operand and ensure it has the same regbank.
4774+
for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4775+
MachineOperand &MO = MI.getOperand(OpIdx);
4776+
if (!MO.isReg())
4777+
continue;
4778+
Register OpReg = MO.getReg();
4779+
const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
4780+
if (RB != DstRB) {
4781+
// Insert a cross-bank copy.
4782+
auto *OpDef = MRI.getVRegDef(OpReg);
4783+
const LLT &Ty = MRI.getType(OpReg);
4784+
MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
4785+
auto Copy = MIB.buildCopy(Ty, OpReg);
4786+
MRI.setRegBank(Copy.getReg(0), *DstRB);
4787+
MO.setReg(Copy.getReg(0));
4788+
}
4789+
}
4790+
}
4791+
4792+
void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
4793+
// We're looking for PHIs, build a list so we don't invalidate iterators.
4794+
MachineRegisterInfo &MRI = MF.getRegInfo();
4795+
SmallVector<MachineInstr *, 32> Phis;
4796+
for (auto &BB : MF) {
4797+
for (auto &MI : BB) {
4798+
if (MI.getOpcode() == TargetOpcode::G_PHI)
4799+
Phis.emplace_back(&MI);
4800+
}
4801+
}
4802+
4803+
for (auto *MI : Phis) {
4804+
// We need to do some work here if the operand types are < 16 bit and they
4805+
// are split across fpr/gpr banks. Since all types <32b on gpr
4806+
// end up being assigned gpr32 regclasses, we can end up with PHIs here
4807+
// which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
4808+
// be selecting heterogenous regbanks for operands if possible, but we
4809+
// still need to be able to deal with it here.
4810+
//
4811+
// To fix this, if we have a gpr-bank operand < 32b in size and at least
4812+
// one other operand is on the fpr bank, then we add cross-bank copies
4813+
// to homogenize the operand banks. For simplicity the bank that we choose
4814+
// to settle on is whatever bank the def operand has. For example:
4815+
//
4816+
// %endbb:
4817+
// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
4818+
// =>
4819+
// %bb2:
4820+
// ...
4821+
// %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
4822+
// ...
4823+
// %endbb:
4824+
// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
4825+
bool HasGPROp = false, HasFPROp = false;
4826+
for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
4827+
const auto &MO = MI->getOperand(OpIdx);
4828+
if (!MO.isReg())
4829+
continue;
4830+
const LLT &Ty = MRI.getType(MO.getReg());
4831+
if (!Ty.isValid() || !Ty.isScalar())
4832+
break;
4833+
if (Ty.getSizeInBits() >= 32)
4834+
break;
4835+
const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
4836+
// If for some reason we don't have a regbank yet. Don't try anything.
4837+
if (!RB)
4838+
break;
4839+
4840+
if (RB->getID() == AArch64::GPRRegBankID)
4841+
HasGPROp = true;
4842+
else
4843+
HasFPROp = true;
4844+
}
4845+
// We have heterogenous regbanks, need to fixup.
4846+
if (HasGPROp && HasFPROp)
4847+
fixupPHIOpBanks(*MI, MRI, RBI);
4848+
}
4849+
}
4850+
47584851
namespace llvm {
47594852
InstructionSelector *
47604853
createAArch64InstructionSelector(const AArch64TargetMachine &TM,
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -verify-machineinstrs -mtriple aarch64--- -run-pass=instruction-select -global-isel %s -o - | FileCheck %s
3+
---
4+
name: test_loop_phi_fpr_to_gpr
5+
alignment: 4
6+
legalized: true
7+
regBankSelected: true
8+
selected: false
9+
failedISel: false
10+
tracksRegLiveness: true
11+
liveins: []
12+
machineFunctionInfo: {}
13+
body: |
14+
; CHECK-LABEL: name: test_loop_phi_fpr_to_gpr
15+
; CHECK: bb.0:
16+
; CHECK: successors: %bb.1(0x80000000)
17+
; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
18+
; CHECK: [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF
19+
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344
20+
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
21+
; CHECK: bb.1:
22+
; CHECK: successors: %bb.2(0x80000000)
23+
; CHECK: [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF
24+
; CHECK: $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv
25+
; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv
26+
; CHECK: bb.2:
27+
; CHECK: successors: %bb.2(0x80000000)
28+
; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[CSELWr]], %bb.1, %8, %bb.2
29+
; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]]
30+
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[FCVTHSr]], %subreg.hsub
31+
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
32+
; CHECK: STRHHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`)
33+
; CHECK: B %bb.2
34+
bb.0:
35+
successors: %bb.1(0x80000000)
36+
37+
%0:gpr(s1) = G_IMPLICIT_DEF
38+
%4:gpr(p0) = G_IMPLICIT_DEF
39+
%8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000
40+
41+
bb.1:
42+
successors: %bb.2(0x80000000)
43+
44+
%6:gpr(s32) = G_IMPLICIT_DEF
45+
%7:gpr(s32) = G_SELECT %0(s1), %6, %6
46+
%1:gpr(s16) = G_TRUNC %7(s32)
47+
48+
bb.2:
49+
successors: %bb.2(0x80000000)
50+
51+
%3:gpr(s16) = G_PHI %1(s16), %bb.1, %5(s16), %bb.2
52+
%5:fpr(s16) = G_FPTRUNC %8(s32)
53+
G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`)
54+
G_BR %bb.2
55+
56+
...
57+
---
58+
name: test_loop_phi_gpr_to_fpr
59+
alignment: 4
60+
legalized: true
61+
regBankSelected: true
62+
selected: false
63+
failedISel: false
64+
tracksRegLiveness: true
65+
liveins: []
66+
machineFunctionInfo: {}
67+
body: |
68+
; CHECK-LABEL: name: test_loop_phi_gpr_to_fpr
69+
; CHECK: bb.0:
70+
; CHECK: successors: %bb.1(0x80000000)
71+
; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
72+
; CHECK: [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF
73+
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344
74+
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
75+
; CHECK: bb.1:
76+
; CHECK: successors: %bb.2(0x80000000)
77+
; CHECK: [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF
78+
; CHECK: $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv
79+
; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv
80+
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[CSELWr]]
81+
; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
82+
; CHECK: bb.2:
83+
; CHECK: successors: %bb.2(0x80000000)
84+
; CHECK: [[PHI:%[0-9]+]]:fpr16 = PHI %7, %bb.2, [[COPY2]], %bb.1
85+
; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]]
86+
; CHECK: STRHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`)
87+
; CHECK: B %bb.2
88+
bb.0:
89+
successors: %bb.1(0x80000000)
90+
91+
%0:gpr(s1) = G_IMPLICIT_DEF
92+
%4:gpr(p0) = G_IMPLICIT_DEF
93+
%8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000
94+
95+
bb.1:
96+
successors: %bb.2(0x80000000)
97+
98+
%6:gpr(s32) = G_IMPLICIT_DEF
99+
%7:gpr(s32) = G_SELECT %0(s1), %6, %6
100+
%1:gpr(s16) = G_TRUNC %7(s32)
101+
102+
bb.2:
103+
successors: %bb.2(0x80000000)
104+
105+
%3:fpr(s16) = G_PHI %5(s16), %bb.2, %1(s16), %bb.1
106+
%5:fpr(s16) = G_FPTRUNC %8(s32)
107+
G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`)
108+
G_BR %bb.2
109+
110+
...

0 commit comments

Comments
 (0)