Skip to content

[AArch64][GlobalISel] Fixup <32b heterogeneous regbanks of G_PHIs jus… #1086

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class AArch64InstructionSelector : public InstructionSelector {
// cache it here for each run of the selector.
ProduceNonFlagSettingCondBr =
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
processPHIs(MF);
}

private:
Expand All @@ -77,6 +78,9 @@ class AArch64InstructionSelector : public InstructionSelector {
// An early selection function that runs before the selectImpl() call.
bool earlySelect(MachineInstr &I) const;

// Do some preprocessing of G_PHIs before we begin selection.
void processPHIs(MachineFunction &MF);

bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;

/// Eliminate same-sized cross-bank copies into stores before selectImpl().
Expand Down Expand Up @@ -4755,6 +4759,95 @@ bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
}
}


// Perform fixups on the given PHI instruction's operands to force them all
// to be the same as the destination regbank.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
const AArch64RegisterBankInfo &RBI) {
assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
Register DstReg = MI.getOperand(0).getReg();
const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
assert(DstRB && "Expected PHI dst to have regbank assigned");
MachineIRBuilder MIB(MI);

// Go through each operand and ensure it has the same regbank.
for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
MachineOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg())
continue;
Register OpReg = MO.getReg();
const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
if (RB != DstRB) {
// Insert a cross-bank copy.
auto *OpDef = MRI.getVRegDef(OpReg);
const LLT &Ty = MRI.getType(OpReg);
MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
auto Copy = MIB.buildCopy(Ty, OpReg);
MRI.setRegBank(Copy.getReg(0), *DstRB);
MO.setReg(Copy.getReg(0));
}
}
}

void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
// We're looking for PHIs, build a list so we don't invalidate iterators.
MachineRegisterInfo &MRI = MF.getRegInfo();
SmallVector<MachineInstr *, 32> Phis;
for (auto &BB : MF) {
for (auto &MI : BB) {
if (MI.getOpcode() == TargetOpcode::G_PHI)
Phis.emplace_back(&MI);
}
}

for (auto *MI : Phis) {
// We need to do some work here if the operand types are < 16 bit and they
// are split across fpr/gpr banks. Since all types <32b on gpr
// end up being assigned gpr32 regclasses, we can end up with PHIs here
// which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
// be selecting heterogenous regbanks for operands if possible, but we
// still need to be able to deal with it here.
//
// To fix this, if we have a gpr-bank operand < 32b in size and at least
// one other operand is on the fpr bank, then we add cross-bank copies
// to homogenize the operand banks. For simplicity the bank that we choose
// to settle on is whatever bank the def operand has. For example:
//
// %endbb:
// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
// =>
// %bb2:
// ...
// %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
// ...
// %endbb:
// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
bool HasGPROp = false, HasFPROp = false;
for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
const auto &MO = MI->getOperand(OpIdx);
if (!MO.isReg())
continue;
const LLT &Ty = MRI.getType(MO.getReg());
if (!Ty.isValid() || !Ty.isScalar())
break;
if (Ty.getSizeInBits() >= 32)
break;
const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
// If for some reason we don't have a regbank yet. Don't try anything.
if (!RB)
break;

if (RB->getID() == AArch64::GPRRegBankID)
HasGPROp = true;
else
HasFPROp = true;
}
// We have heterogenous regbanks, need to fixup.
if (HasGPROp && HasFPROp)
fixupPHIOpBanks(*MI, MRI, RBI);
}
}

namespace llvm {
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &TM,
Expand Down
110 changes: 110 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -verify-machineinstrs -mtriple aarch64--- -run-pass=instruction-select -global-isel %s -o - | FileCheck %s
---
name: test_loop_phi_fpr_to_gpr
alignment: 4
legalized: true
regBankSelected: true
selected: false
failedISel: false
tracksRegLiveness: true
liveins: []
machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: test_loop_phi_fpr_to_gpr
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF
; CHECK: $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv
; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv
; CHECK: bb.2:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[CSELWr]], %bb.1, %8, %bb.2
; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]]
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[FCVTHSr]], %subreg.hsub
; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
; CHECK: STRHHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`)
; CHECK: B %bb.2
bb.0:
successors: %bb.1(0x80000000)

%0:gpr(s1) = G_IMPLICIT_DEF
%4:gpr(p0) = G_IMPLICIT_DEF
%8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000

bb.1:
successors: %bb.2(0x80000000)

%6:gpr(s32) = G_IMPLICIT_DEF
%7:gpr(s32) = G_SELECT %0(s1), %6, %6
%1:gpr(s16) = G_TRUNC %7(s32)

bb.2:
successors: %bb.2(0x80000000)

%3:gpr(s16) = G_PHI %1(s16), %bb.1, %5(s16), %bb.2
%5:fpr(s16) = G_FPTRUNC %8(s32)
G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`)
G_BR %bb.2

...
---
name: test_loop_phi_gpr_to_fpr
alignment: 4
legalized: true
regBankSelected: true
selected: false
failedISel: false
tracksRegLiveness: true
liveins: []
machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: test_loop_phi_gpr_to_fpr
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF
; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344
; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF
; CHECK: $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv
; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[CSELWr]]
; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
; CHECK: bb.2:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[PHI:%[0-9]+]]:fpr16 = PHI %7, %bb.2, [[COPY2]], %bb.1
; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]]
; CHECK: STRHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`)
; CHECK: B %bb.2
bb.0:
successors: %bb.1(0x80000000)

%0:gpr(s1) = G_IMPLICIT_DEF
%4:gpr(p0) = G_IMPLICIT_DEF
%8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000

bb.1:
successors: %bb.2(0x80000000)

%6:gpr(s32) = G_IMPLICIT_DEF
%7:gpr(s32) = G_SELECT %0(s1), %6, %6
%1:gpr(s16) = G_TRUNC %7(s32)

bb.2:
successors: %bb.2(0x80000000)

%3:fpr(s16) = G_PHI %5(s16), %bb.2, %1(s16), %bb.1
%5:fpr(s16) = G_FPTRUNC %8(s32)
G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`)
G_BR %bb.2

...