[AArch64][GlobalISel] Infer whether G_PHI is going to be a FPR in regbankselect

Some instructions (G_LOAD, G_SELECT, G_UNMERGE_VALUES) check if their uses
will define/use FPRs (using `onlyUsesFP` and `onlyDefinesFP`).

The register bank of a use isn't necessarily known when an instruction asks for
this.

Teach `hasFPConstraints` to look at the instructions feeding into a G_PHI when
its destination bank is unknown. If any of them are FPR, assume the entire
G_PHI will also be assigned a FPR.

Since a phi can have many inputs, and those inputs can in turn be phis,
restrict the search depth to a very low number.

Also improve the docs for `hasFPConstraints` and friends a little.

This is a 0.3% code size improvement on CTMark/Bullet at -O3, and a 0.2% code
size improvement at CTMark/pairlocalalign at -O3.

Differential Revision: https://reviews.llvm.org/D88177
This commit is contained in:
Jessica Paquette 2020-09-23 11:28:10 -07:00
parent 745abbbb85
commit 9d7ec46f57
3 changed files with 299 additions and 18 deletions

View File

@ -466,9 +466,10 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
getValueMapping(RBIdx, Size), NumOperands);
}
bool AArch64RegisterBankInfo::hasFPConstraints(
const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
unsigned Depth) const {
unsigned Op = MI.getOpcode();
// Do we have an explicit floating point instruction?
@ -480,14 +481,30 @@ bool AArch64RegisterBankInfo::hasFPConstraints(
if (Op != TargetOpcode::COPY && !MI.isPHI())
return false;
// MI is copy-like. Return true if it outputs an FPR.
return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
&AArch64::FPRRegBank;
// Check if we already know the register bank.
auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
if (RB == &AArch64::FPRRegBank)
return true;
if (RB == &AArch64::GPRRegBank)
return false;
// We don't know anything.
//
// If we have a phi, we may be able to infer that it will be assigned a FPR
// based off of its inputs.
if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
return false;
return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
return Op.isReg() &&
onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
});
}
bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
const TargetRegisterInfo &TRI,
unsigned Depth) const {
switch (MI.getOpcode()) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
@ -496,12 +513,13 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
default:
break;
}
return hasFPConstraints(MI, MRI, TRI);
return hasFPConstraints(MI, MRI, TRI, Depth);
}
bool AArch64RegisterBankInfo::onlyDefinesFP(
const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
unsigned Depth) const {
switch (MI.getOpcode()) {
case AArch64::G_DUP:
case TargetOpcode::G_SITOFP:
@ -512,7 +530,7 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(
default:
break;
}
return hasFPConstraints(MI, MRI, TRI);
return hasFPConstraints(MI, MRI, TRI, Depth);
}
const RegisterBankInfo::InstructionMapping &

View File

@ -114,17 +114,20 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
const InstructionMapping &
getSameKindOfOperandsMapping(const MachineInstr &MI) const;
/// Returns true if the output of \p MI must be stored on a FPR register.
/// Maximum recursion depth for hasFPConstraints.
const unsigned MaxFPRSearchDepth = 2;
/// \returns true if \p MI only uses and defines FPRs.
bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const;
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
/// Returns true if the source registers of \p MI must all be FPRs.
/// \returns true if \p MI only uses FPRs.
bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const;
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
/// Returns true if the destination register of \p MI must be a FPR.
/// \returns true if \p MI only defines FPRs.
bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const;
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
public:
AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);

View File

@ -101,3 +101,263 @@ body: |
%4:_(s32) = G_SITOFP %2
%6:_(s32) = G_SELECT %1(s1), %3, %4
%8:_(s32) = G_FPTOSI %6
...
---
name: load_used_by_phi_fpr
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: load_used_by_phi_fpr
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: liveins: $x0, $s0, $s1, $w0, $w1
; CHECK: %cond_wide:gpr(s32) = COPY $w0
; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32)
; CHECK: %fpr_copy:fpr(s32) = COPY $s0
; CHECK: %ptr:gpr(p0) = COPY $x0
; CHECK: G_BRCOND %cond(s1), %bb.1
; CHECK: G_BR %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: %load:fpr(s32) = G_LOAD %ptr(p0) :: (load 4)
; CHECK: G_BR %bb.2
; CHECK: bb.2:
; CHECK: %phi:fpr(s32) = G_PHI %fpr_copy(s32), %bb.0, %load(s32), %bb.1
; CHECK: $s0 = COPY %phi(s32)
; CHECK: RET_ReallyLR implicit $s0
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $x0, $s0, $s1, $w0, $w1
%cond_wide:_(s32) = COPY $w0
%cond:_(s1) = G_TRUNC %cond_wide(s32)
%fpr_copy:_(s32) = COPY $s0
%ptr:_(p0) = COPY $x0
G_BRCOND %cond(s1), %bb.1
G_BR %bb.2
bb.1:
successors: %bb.2
%load:_(s32) = G_LOAD %ptr(p0) :: (load 4)
G_BR %bb.2
bb.2:
%phi:_(s32) = G_PHI %fpr_copy(s32), %bb.0, %load(s32), %bb.1
$s0 = COPY %phi(s32)
RET_ReallyLR implicit $s0
...
---
name: load_used_by_phi_gpr
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: load_used_by_phi_gpr
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: liveins: $x0, $s0, $s1, $w0, $w1
; CHECK: %cond_wide:gpr(s32) = COPY $w0
; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32)
; CHECK: %gpr_copy:gpr(s32) = COPY $w1
; CHECK: %ptr:gpr(p0) = COPY $x0
; CHECK: G_BRCOND %cond(s1), %bb.1
; CHECK: G_BR %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
; CHECK: G_BR %bb.2
; CHECK: bb.2:
; CHECK: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
; CHECK: $s0 = COPY %phi(s32)
; CHECK: RET_ReallyLR implicit $s0
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $x0, $s0, $s1, $w0, $w1
%cond_wide:_(s32) = COPY $w0
%cond:_(s1) = G_TRUNC %cond_wide(s32)
%gpr_copy:_(s32) = COPY $w1
%ptr:_(p0) = COPY $x0
G_BRCOND %cond(s1), %bb.1
G_BR %bb.2
bb.1:
successors: %bb.2
%load:_(s32) = G_LOAD %ptr(p0) :: (load 4)
G_BR %bb.2
bb.2:
%phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
$s0 = COPY %phi(s32)
RET_ReallyLR implicit $s0
...
---
name: select_used_by_phi_fpr
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: select_used_by_phi_fpr
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: liveins: $s0, $s1, $w0, $w1
; CHECK: %cond_wide:gpr(s32) = COPY $w0
; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32)
; CHECK: %fpr_copy:fpr(s32) = COPY $s0
; CHECK: %gpr_copy:gpr(s32) = COPY $w1
; CHECK: G_BRCOND %cond(s1), %bb.1
; CHECK: G_BR %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %gpr_copy(s32)
; CHECK: %select:fpr(s32) = G_SELECT %cond(s1), %fpr_copy, [[COPY]]
; CHECK: G_BR %bb.2
; CHECK: bb.2:
; CHECK: %phi:fpr(s32) = G_PHI %fpr_copy(s32), %bb.0, %select(s32), %bb.1
; CHECK: $w0 = COPY %phi(s32)
; CHECK: RET_ReallyLR implicit $w0
; The G_SELECT and G_PHI should end up with the same register bank.
;
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $s0, $s1, $w0, $w1
%cond_wide:_(s32) = COPY $w0
%cond:_(s1) = G_TRUNC %cond_wide(s32)
%fpr_copy:_(s32) = COPY $s0
%gpr_copy:_(s32) = COPY $w1
G_BRCOND %cond(s1), %bb.1
G_BR %bb.2
bb.1:
successors: %bb.2
%select:_(s32) = G_SELECT %cond(s1), %fpr_copy, %gpr_copy
G_BR %bb.2
bb.2:
%phi:_(s32) = G_PHI %fpr_copy(s32), %bb.0, %select(s32), %bb.1
$w0 = COPY %phi(s32)
RET_ReallyLR implicit $w0
...
---
name: select_used_by_phi_gpr
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: select_used_by_phi_gpr
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: liveins: $s0, $s1, $w0, $w1
; CHECK: %cond_wide:gpr(s32) = COPY $w0
; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32)
; CHECK: %fpr_copy:fpr(s32) = COPY $s0
; CHECK: %gpr_copy:gpr(s32) = COPY $w1
; CHECK: G_BRCOND %cond(s1), %bb.1
; CHECK: G_BR %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %fpr_copy(s32)
; CHECK: %select:gpr(s32) = G_SELECT %cond(s1), [[COPY]], %gpr_copy
; CHECK: G_BR %bb.2
; CHECK: bb.2:
; CHECK: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %select(s32), %bb.1
; CHECK: $s0 = COPY %phi(s32)
; CHECK: RET_ReallyLR implicit $s0
; The G_SELECT and G_PHI should end up with the same register bank.
;
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $s0, $s1, $w0, $w1
%cond_wide:_(s32) = COPY $w0
%cond:_(s1) = G_TRUNC %cond_wide(s32)
%fpr_copy:_(s32) = COPY $s0
%gpr_copy:_(s32) = COPY $w1
G_BRCOND %cond(s1), %bb.1
G_BR %bb.2
bb.1:
successors: %bb.2
%select:_(s32) = G_SELECT %cond(s1), %fpr_copy, %gpr_copy
G_BR %bb.2
bb.2:
%phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %select(s32), %bb.1
$s0 = COPY %phi(s32)
RET_ReallyLR implicit $s0
...
---
name: unmerge_used_by_phi_fpr
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: unmerge_used_by_phi_fpr
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: liveins: $x0, $s0, $s1, $w0, $w1
; CHECK: %cond_wide:gpr(s32) = COPY $w0
; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32)
; CHECK: %fpr_copy:fpr(s32) = COPY $s0
; CHECK: %unmerge_src:gpr(s64) = COPY $x0
; CHECK: G_BRCOND %cond(s1), %bb.1
; CHECK: G_BR %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY %unmerge_src(s64)
; CHECK: %unmerge_1:fpr(s32), %unmerge_2:fpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: G_BR %bb.2
; CHECK: bb.2:
; CHECK: %phi:fpr(s32) = G_PHI %fpr_copy(s32), %bb.0, %unmerge_1(s32), %bb.1
; CHECK: $s0 = COPY %phi(s32)
; CHECK: RET_ReallyLR implicit $s0
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $x0, $s0, $s1, $w0, $w1
%cond_wide:_(s32) = COPY $w0
%cond:_(s1) = G_TRUNC %cond_wide(s32)
%fpr_copy:_(s32) = COPY $s0
%unmerge_src:_(s64) = COPY $x0
G_BRCOND %cond(s1), %bb.1
G_BR %bb.2
bb.1:
successors: %bb.2
%unmerge_1:_(s32), %unmerge_2:_(s32) = G_UNMERGE_VALUES %unmerge_src(s64)
G_BR %bb.2
bb.2:
%phi:_(s32) = G_PHI %fpr_copy(s32), %bb.0, %unmerge_1(s32), %bb.1
$s0 = COPY %phi(s32)
RET_ReallyLR implicit $s0
...
---
name: unmerge_used_by_phi_gpr
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: unmerge_used_by_phi_gpr
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: liveins: $x0, $s0, $s1, $w0, $w1
; CHECK: %cond_wide:gpr(s32) = COPY $w0
; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32)
; CHECK: %gpr_copy:gpr(s32) = COPY $w1
; CHECK: %unmerge_src:gpr(s64) = COPY $x0
; CHECK: G_BRCOND %cond(s1), %bb.1
; CHECK: G_BR %bb.2
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: %unmerge_1:gpr(s32), %unmerge_2:gpr(s32) = G_UNMERGE_VALUES %unmerge_src(s64)
; CHECK: G_BR %bb.2
; CHECK: bb.2:
; CHECK: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %unmerge_1(s32), %bb.1
; CHECK: $s0 = COPY %phi(s32)
; CHECK: RET_ReallyLR implicit $s0
bb.0:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
liveins: $x0, $s0, $s1, $w0, $w1
%cond_wide:_(s32) = COPY $w0
%cond:_(s1) = G_TRUNC %cond_wide(s32)
%gpr_copy:_(s32) = COPY $w1
%unmerge_src:_(s64) = COPY $x0
G_BRCOND %cond(s1), %bb.1
G_BR %bb.2
bb.1:
successors: %bb.2
%unmerge_1:_(s32), %unmerge_2:_(s32) = G_UNMERGE_VALUES %unmerge_src(s64)
G_BR %bb.2
bb.2:
%phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %unmerge_1(s32), %bb.1
$s0 = COPY %phi(s32)
RET_ReallyLR implicit $s0