forked from OSchip/llvm-project
AArch64: Use DeadRegisterDefinitionsPass before regalloc.
Doing this before register allocation reduces register pressure as we do not even have to allocate a register for those dead definitions. Differential Revision: https://reviews.llvm.org/D26111 llvm-svn: 287076
This commit is contained in:
parent
6b335d1948
commit
3d51cf0a2c
|
@ -17,9 +17,11 @@
|
|||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "aarch64-dead-defs"
|
||||
|
@ -32,8 +34,9 @@ namespace {
|
|||
class AArch64DeadRegisterDefinitions : public MachineFunctionPass {
|
||||
private:
|
||||
const TargetRegisterInfo *TRI;
|
||||
const MachineRegisterInfo *MRI;
|
||||
const TargetInstrInfo *TII;
|
||||
bool Changed;
|
||||
bool implicitlyDefinesOverlappingReg(unsigned Reg, const MachineInstr &MI);
|
||||
void processMachineBasicBlock(MachineBasicBlock &MBB);
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid.
|
||||
|
@ -44,11 +47,6 @@ public:
|
|||
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
|
||||
MachineFunctionProperties getRequiredProperties() const override {
|
||||
return MachineFunctionProperties().set(
|
||||
MachineFunctionProperties::Property::NoVRegs);
|
||||
}
|
||||
|
||||
StringRef getPassName() const override { return AARCH64_DEAD_REG_DEF_NAME; }
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
|
@ -62,15 +60,6 @@ char AArch64DeadRegisterDefinitions::ID = 0;
|
|||
INITIALIZE_PASS(AArch64DeadRegisterDefinitions, "aarch64-dead-defs",
|
||||
AARCH64_DEAD_REG_DEF_NAME, false, false)
|
||||
|
||||
bool AArch64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg(
|
||||
unsigned Reg, const MachineInstr &MI) {
|
||||
for (const MachineOperand &MO : MI.implicit_operands())
|
||||
if (MO.isReg() && MO.isDef())
|
||||
if (TRI->regsOverlap(Reg, MO.getReg()))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool usesFrameIndex(const MachineInstr &MI) {
|
||||
for (const MachineOperand &MO : MI.uses())
|
||||
if (MO.isFI())
|
||||
|
@ -80,6 +69,7 @@ static bool usesFrameIndex(const MachineInstr &MI) {
|
|||
|
||||
void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
|
||||
MachineBasicBlock &MBB) {
|
||||
const MachineFunction &MF = *MBB.getParent();
|
||||
for (MachineInstr &MI : MBB) {
|
||||
if (usesFrameIndex(MI)) {
|
||||
// We need to skip this instruction because while it appears to have a
|
||||
|
@ -97,7 +87,13 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
|
|||
const MCInstrDesc &Desc = MI.getDesc();
|
||||
for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
|
||||
MachineOperand &MO = MI.getOperand(I);
|
||||
if (!MO.isReg() || !MO.isDead() || !MO.isDef())
|
||||
if (!MO.isReg() || !MO.isDef())
|
||||
continue;
|
||||
// We should not have any relevant physreg defs that are replacable by
|
||||
// zero before register allocation. So we just check for dead vreg defs.
|
||||
unsigned Reg = MO.getReg();
|
||||
if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
|
||||
(!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
|
||||
continue;
|
||||
assert(!MO.isImplicit() && "Unexpected implicit def!");
|
||||
DEBUG(dbgs() << " Dead def operand #" << I << " in:\n ";
|
||||
|
@ -107,28 +103,22 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
|
|||
DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
|
||||
continue;
|
||||
}
|
||||
// Don't change the register if there's an implicit def of a subreg or
|
||||
// superreg.
|
||||
if (implicitlyDefinesOverlappingReg(MO.getReg(), MI)) {
|
||||
DEBUG(dbgs() << " Ignoring, implicitly defines overlap reg.\n");
|
||||
continue;
|
||||
}
|
||||
// Make sure the instruction take a register class that contains
|
||||
// the zero register and replace it if so.
|
||||
const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
|
||||
unsigned NewReg;
|
||||
switch (Desc.OpInfo[I].RegClass) {
|
||||
default:
|
||||
if (RC == nullptr) {
|
||||
DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
|
||||
continue;
|
||||
case AArch64::GPR32RegClassID:
|
||||
} else if (RC->contains(AArch64::WZR))
|
||||
NewReg = AArch64::WZR;
|
||||
break;
|
||||
case AArch64::GPR64RegClassID:
|
||||
else if (RC->contains(AArch64::XZR))
|
||||
NewReg = AArch64::XZR;
|
||||
break;
|
||||
else {
|
||||
DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
|
||||
continue;
|
||||
}
|
||||
DEBUG(dbgs() << " Replacing with zero register. New:\n ");
|
||||
MO.setReg(NewReg);
|
||||
MO.setIsDead();
|
||||
DEBUG(MI.print(dbgs()));
|
||||
++NumDeadDefsReplaced;
|
||||
Changed = true;
|
||||
|
@ -145,6 +135,8 @@ bool AArch64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
|
|||
return false;
|
||||
|
||||
TRI = MF.getSubtarget().getRegisterInfo();
|
||||
TII = MF.getSubtarget().getInstrInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n");
|
||||
Changed = false;
|
||||
for (auto &MBB : MF)
|
||||
|
|
|
@ -434,6 +434,10 @@ bool AArch64PassConfig::addILPOpts() {
|
|||
}
|
||||
|
||||
void AArch64PassConfig::addPreRegAlloc() {
|
||||
// Change dead register definitions to refer to the zero register.
|
||||
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
|
||||
addPass(createAArch64DeadRegisterDefinitions());
|
||||
|
||||
// Use AdvSIMD scalar instructions whenever profitable.
|
||||
if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
|
||||
addPass(createAArch64AdvSIMDScalar());
|
||||
|
@ -448,9 +452,6 @@ void AArch64PassConfig::addPostRegAlloc() {
|
|||
if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
|
||||
addPass(createAArch64RedundantCopyEliminationPass());
|
||||
|
||||
// Change dead register definitions to refer to the zero register.
|
||||
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
|
||||
addPass(createAArch64DeadRegisterDefinitions());
|
||||
if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
|
||||
// Improve performance for some FP/SIMD code for A57.
|
||||
addPass(createAArch64A57FPLoadBalancing());
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
; RUN: llc -o - %s | FileCheck %s
|
||||
target triple = "aarch64--"
|
||||
|
||||
declare void @begin()
|
||||
declare void @end()
|
||||
|
||||
; Test that we use the zero register before regalloc and do not unnecessarily
|
||||
; clobber a register with the SUBS (cmp) instruction.
|
||||
; CHECK-LABEL: func:
|
||||
define void @func(i64* %addr) {
|
||||
; We should not see any spills or reloads between begin and end
|
||||
; CHECK: bl begin
|
||||
; CHECK-NOT: str{{.*}}sp
|
||||
; CHECK-NOT: Folded Spill
|
||||
; CHECK-NOT: ldr{{.*}}sp
|
||||
; CHECK-NOT: Folded Reload
|
||||
call void @begin()
|
||||
%v0 = load volatile i64, i64* %addr
|
||||
%v1 = load volatile i64, i64* %addr
|
||||
%v2 = load volatile i64, i64* %addr
|
||||
%v3 = load volatile i64, i64* %addr
|
||||
%v4 = load volatile i64, i64* %addr
|
||||
%v5 = load volatile i64, i64* %addr
|
||||
%v6 = load volatile i64, i64* %addr
|
||||
%v7 = load volatile i64, i64* %addr
|
||||
%v8 = load volatile i64, i64* %addr
|
||||
%v9 = load volatile i64, i64* %addr
|
||||
%v10 = load volatile i64, i64* %addr
|
||||
%v11 = load volatile i64, i64* %addr
|
||||
%v12 = load volatile i64, i64* %addr
|
||||
%v13 = load volatile i64, i64* %addr
|
||||
%v14 = load volatile i64, i64* %addr
|
||||
%v15 = load volatile i64, i64* %addr
|
||||
%v16 = load volatile i64, i64* %addr
|
||||
%v17 = load volatile i64, i64* %addr
|
||||
%v18 = load volatile i64, i64* %addr
|
||||
%v19 = load volatile i64, i64* %addr
|
||||
%v20 = load volatile i64, i64* %addr
|
||||
%v21 = load volatile i64, i64* %addr
|
||||
%v22 = load volatile i64, i64* %addr
|
||||
%v23 = load volatile i64, i64* %addr
|
||||
%v24 = load volatile i64, i64* %addr
|
||||
%v25 = load volatile i64, i64* %addr
|
||||
%v26 = load volatile i64, i64* %addr
|
||||
%v27 = load volatile i64, i64* %addr
|
||||
%v28 = load volatile i64, i64* %addr
|
||||
%v29 = load volatile i64, i64* %addr
|
||||
|
||||
%c = icmp eq i64 %v0, %v1
|
||||
br i1 %c, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
store volatile i64 %v2, i64* %addr
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
store volatile i64 %v0, i64* %addr
|
||||
store volatile i64 %v1, i64* %addr
|
||||
store volatile i64 %v2, i64* %addr
|
||||
store volatile i64 %v3, i64* %addr
|
||||
store volatile i64 %v4, i64* %addr
|
||||
store volatile i64 %v5, i64* %addr
|
||||
store volatile i64 %v6, i64* %addr
|
||||
store volatile i64 %v7, i64* %addr
|
||||
store volatile i64 %v8, i64* %addr
|
||||
store volatile i64 %v9, i64* %addr
|
||||
store volatile i64 %v10, i64* %addr
|
||||
store volatile i64 %v11, i64* %addr
|
||||
store volatile i64 %v12, i64* %addr
|
||||
store volatile i64 %v13, i64* %addr
|
||||
store volatile i64 %v14, i64* %addr
|
||||
store volatile i64 %v15, i64* %addr
|
||||
store volatile i64 %v16, i64* %addr
|
||||
store volatile i64 %v17, i64* %addr
|
||||
store volatile i64 %v18, i64* %addr
|
||||
store volatile i64 %v19, i64* %addr
|
||||
store volatile i64 %v20, i64* %addr
|
||||
store volatile i64 %v21, i64* %addr
|
||||
store volatile i64 %v22, i64* %addr
|
||||
store volatile i64 %v23, i64* %addr
|
||||
store volatile i64 %v24, i64* %addr
|
||||
store volatile i64 %v25, i64* %addr
|
||||
store volatile i64 %v26, i64* %addr
|
||||
store volatile i64 %v27, i64* %addr
|
||||
store volatile i64 %v28, i64* %addr
|
||||
store volatile i64 %v29, i64* %addr
|
||||
; CHECK: bl end
|
||||
call void @end()
|
||||
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue