AArch64: Use DeadRegisterDefinitionsPass before regalloc.

Doing this before register allocation reduces register pressure as we do
not even have to allocate a register for those dead definitions.

Differential Revision: https://reviews.llvm.org/D26111

llvm-svn: 287076
This commit is contained in:
Matthias Braun 2016-11-16 03:38:27 +00:00
parent 6b335d1948
commit 3d51cf0a2c
3 changed files with 117 additions and 33 deletions

View File

@ -17,9 +17,11 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
#define DEBUG_TYPE "aarch64-dead-defs"
@ -32,8 +34,9 @@ namespace {
class AArch64DeadRegisterDefinitions : public MachineFunctionPass {
private:
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
bool Changed;
bool implicitlyDefinesOverlappingReg(unsigned Reg, const MachineInstr &MI);
void processMachineBasicBlock(MachineBasicBlock &MBB);
public:
static char ID; // Pass identification, replacement for typeid.
@ -44,11 +47,6 @@ public:
bool runOnMachineFunction(MachineFunction &F) override;
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
}
StringRef getPassName() const override { return AARCH64_DEAD_REG_DEF_NAME; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@ -62,15 +60,6 @@ char AArch64DeadRegisterDefinitions::ID = 0;
INITIALIZE_PASS(AArch64DeadRegisterDefinitions, "aarch64-dead-defs",
AARCH64_DEAD_REG_DEF_NAME, false, false)
bool AArch64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg(
unsigned Reg, const MachineInstr &MI) {
for (const MachineOperand &MO : MI.implicit_operands())
if (MO.isReg() && MO.isDef())
if (TRI->regsOverlap(Reg, MO.getReg()))
return true;
return false;
}
static bool usesFrameIndex(const MachineInstr &MI) {
for (const MachineOperand &MO : MI.uses())
if (MO.isFI())
@ -80,6 +69,7 @@ static bool usesFrameIndex(const MachineInstr &MI) {
void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
for (MachineInstr &MI : MBB) {
if (usesFrameIndex(MI)) {
// We need to skip this instruction because while it appears to have a
@ -97,7 +87,13 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
const MCInstrDesc &Desc = MI.getDesc();
for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg() || !MO.isDead() || !MO.isDef())
if (!MO.isReg() || !MO.isDef())
continue;
// We should not have any relevant physreg defs that are replacable by
// zero before register allocation. So we just check for dead vreg defs.
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
(!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
continue;
assert(!MO.isImplicit() && "Unexpected implicit def!");
DEBUG(dbgs() << " Dead def operand #" << I << " in:\n ";
@ -107,28 +103,22 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
continue;
}
// Don't change the register if there's an implicit def of a subreg or
// superreg.
if (implicitlyDefinesOverlappingReg(MO.getReg(), MI)) {
DEBUG(dbgs() << " Ignoring, implicitly defines overlap reg.\n");
continue;
}
// Make sure the instruction take a register class that contains
// the zero register and replace it if so.
const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
unsigned NewReg;
switch (Desc.OpInfo[I].RegClass) {
default:
if (RC == nullptr) {
DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
continue;
case AArch64::GPR32RegClassID:
} else if (RC->contains(AArch64::WZR))
NewReg = AArch64::WZR;
break;
case AArch64::GPR64RegClassID:
else if (RC->contains(AArch64::XZR))
NewReg = AArch64::XZR;
break;
else {
DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
continue;
}
DEBUG(dbgs() << " Replacing with zero register. New:\n ");
MO.setReg(NewReg);
MO.setIsDead();
DEBUG(MI.print(dbgs()));
++NumDeadDefsReplaced;
Changed = true;
@ -145,6 +135,8 @@ bool AArch64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
return false;
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
MRI = &MF.getRegInfo();
DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n");
Changed = false;
for (auto &MBB : MF)

View File

@ -434,6 +434,10 @@ bool AArch64PassConfig::addILPOpts() {
}
void AArch64PassConfig::addPreRegAlloc() {
// Change dead register definitions to refer to the zero register.
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
addPass(createAArch64DeadRegisterDefinitions());
// Use AdvSIMD scalar instructions whenever profitable.
if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
addPass(createAArch64AdvSIMDScalar());
@ -448,9 +452,6 @@ void AArch64PassConfig::addPostRegAlloc() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
addPass(createAArch64RedundantCopyEliminationPass());
// Change dead register definitions to refer to the zero register.
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
addPass(createAArch64DeadRegisterDefinitions());
if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
// Improve performance for some FP/SIMD code for A57.
addPass(createAArch64A57FPLoadBalancing());

View File

@ -0,0 +1,91 @@
; RUN: llc -o - %s | FileCheck %s
target triple = "aarch64--"
declare void @begin()
declare void @end()
; Test that we use the zero register before regalloc and do not unnecessarily
; clobber a register with the SUBS (cmp) instruction.
; CHECK-LABEL: func:
define void @func(i64* %addr) {
; We should not see any spills or reloads between begin and end
; CHECK: bl begin
; CHECK-NOT: str{{.*}}sp
; CHECK-NOT: Folded Spill
; CHECK-NOT: ldr{{.*}}sp
; CHECK-NOT: Folded Reload
call void @begin()
%v0 = load volatile i64, i64* %addr
%v1 = load volatile i64, i64* %addr
%v2 = load volatile i64, i64* %addr
%v3 = load volatile i64, i64* %addr
%v4 = load volatile i64, i64* %addr
%v5 = load volatile i64, i64* %addr
%v6 = load volatile i64, i64* %addr
%v7 = load volatile i64, i64* %addr
%v8 = load volatile i64, i64* %addr
%v9 = load volatile i64, i64* %addr
%v10 = load volatile i64, i64* %addr
%v11 = load volatile i64, i64* %addr
%v12 = load volatile i64, i64* %addr
%v13 = load volatile i64, i64* %addr
%v14 = load volatile i64, i64* %addr
%v15 = load volatile i64, i64* %addr
%v16 = load volatile i64, i64* %addr
%v17 = load volatile i64, i64* %addr
%v18 = load volatile i64, i64* %addr
%v19 = load volatile i64, i64* %addr
%v20 = load volatile i64, i64* %addr
%v21 = load volatile i64, i64* %addr
%v22 = load volatile i64, i64* %addr
%v23 = load volatile i64, i64* %addr
%v24 = load volatile i64, i64* %addr
%v25 = load volatile i64, i64* %addr
%v26 = load volatile i64, i64* %addr
%v27 = load volatile i64, i64* %addr
%v28 = load volatile i64, i64* %addr
%v29 = load volatile i64, i64* %addr
%c = icmp eq i64 %v0, %v1
br i1 %c, label %if.then, label %if.end
if.then:
store volatile i64 %v2, i64* %addr
br label %if.end
if.end:
store volatile i64 %v0, i64* %addr
store volatile i64 %v1, i64* %addr
store volatile i64 %v2, i64* %addr
store volatile i64 %v3, i64* %addr
store volatile i64 %v4, i64* %addr
store volatile i64 %v5, i64* %addr
store volatile i64 %v6, i64* %addr
store volatile i64 %v7, i64* %addr
store volatile i64 %v8, i64* %addr
store volatile i64 %v9, i64* %addr
store volatile i64 %v10, i64* %addr
store volatile i64 %v11, i64* %addr
store volatile i64 %v12, i64* %addr
store volatile i64 %v13, i64* %addr
store volatile i64 %v14, i64* %addr
store volatile i64 %v15, i64* %addr
store volatile i64 %v16, i64* %addr
store volatile i64 %v17, i64* %addr
store volatile i64 %v18, i64* %addr
store volatile i64 %v19, i64* %addr
store volatile i64 %v20, i64* %addr
store volatile i64 %v21, i64* %addr
store volatile i64 %v22, i64* %addr
store volatile i64 %v23, i64* %addr
store volatile i64 %v24, i64* %addr
store volatile i64 %v25, i64* %addr
store volatile i64 %v26, i64* %addr
store volatile i64 %v27, i64* %addr
store volatile i64 %v28, i64* %addr
store volatile i64 %v29, i64* %addr
; CHECK: bl end
call void @end()
ret void
}