forked from OSchip/llvm-project
[MIR-Canon] Adding support for local idempotent instruction hoisting.
llvm-svn: 328915
This commit is contained in:
parent
13a0f83a05
commit
57c4f38c35
|
@ -131,7 +131,43 @@ static unsigned GetDummyVReg(const MachineFunction &MF) {
|
|||
return ~0U;
|
||||
}
|
||||
|
||||
static bool rescheduleCanonically(MachineBasicBlock *MBB) {
|
||||
static bool
|
||||
rescheduleLexographically(std::vector<MachineInstr *> instructions,
|
||||
MachineBasicBlock *MBB,
|
||||
std::function<MachineBasicBlock::iterator()> getPos) {
|
||||
|
||||
bool Changed = false;
|
||||
std::map<std::string, MachineInstr*> StringInstrMap;
|
||||
|
||||
for (auto *II : instructions) {
|
||||
std::string S;
|
||||
raw_string_ostream OS(S);
|
||||
II->print(OS);
|
||||
OS.flush();
|
||||
|
||||
// Trim the assignment, or start from the begining in the case of a store.
|
||||
const size_t i = S.find("=");
|
||||
StringInstrMap.insert({(i == std::string::npos) ? S : S.substr(i), II});
|
||||
}
|
||||
|
||||
for (auto &II : StringInstrMap) {
|
||||
|
||||
DEBUG({
|
||||
dbgs() << "Splicing ";
|
||||
II.second->dump();
|
||||
dbgs() << " right before: ";
|
||||
getPos()->dump();
|
||||
});
|
||||
|
||||
Changed = true;
|
||||
MBB->splice(getPos(), MBB, II.second);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
|
||||
MachineBasicBlock *MBB) {
|
||||
|
||||
bool Changed = false;
|
||||
|
||||
|
@ -153,13 +189,59 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) {
|
|||
Instructions.push_back(&MI);
|
||||
}
|
||||
|
||||
std::vector<MachineInstr *> PseudoIdempotentInstructions;
|
||||
std::vector<unsigned> PhysRegDefs;
|
||||
for (auto *II : Instructions) {
|
||||
for (unsigned i = 1; i < II->getNumOperands(); i++) {
|
||||
MachineOperand &MO = II->getOperand(i);
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
|
||||
if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
|
||||
continue;
|
||||
|
||||
if (!MO.isDef())
|
||||
continue;
|
||||
|
||||
PhysRegDefs.push_back(MO.getReg());
|
||||
}
|
||||
}
|
||||
|
||||
for (auto *II : Instructions) {
|
||||
if (II->getNumOperands() == 0)
|
||||
continue;
|
||||
if (II->mayLoadOrStore())
|
||||
continue;
|
||||
|
||||
MachineOperand &MO = II->getOperand(0);
|
||||
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
|
||||
continue;
|
||||
if (!MO.isDef())
|
||||
continue;
|
||||
|
||||
bool IsPseudoIdempotent = true;
|
||||
for (unsigned i = 1; i < II->getNumOperands(); i++) {
|
||||
|
||||
if (II->getOperand(i).isImm()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (II->getOperand(i).isReg()) {
|
||||
if (!TargetRegisterInfo::isVirtualRegister(II->getOperand(i).getReg()))
|
||||
if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
|
||||
PhysRegDefs.end()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
IsPseudoIdempotent = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (IsPseudoIdempotent) {
|
||||
PseudoIdempotentInstructions.push_back(II);
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
|
||||
|
||||
|
@ -194,9 +276,6 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) {
|
|||
if (DefI != BBE && UseI != BBE)
|
||||
break;
|
||||
|
||||
if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo))
|
||||
continue;
|
||||
|
||||
if (&*BBI == Def) {
|
||||
DefI = BBI;
|
||||
continue;
|
||||
|
@ -222,6 +301,12 @@ static bool rescheduleCanonically(MachineBasicBlock *MBB) {
|
|||
MBB->splice(UseI, MBB, DefI);
|
||||
}
|
||||
|
||||
PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
|
||||
DEBUG(dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
|
||||
Changed |= rescheduleLexographically(
|
||||
PseudoIdempotentInstructions, MBB,
|
||||
[&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
|
@ -517,7 +602,8 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
|
|||
DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
|
||||
|
||||
DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
|
||||
Changed |= rescheduleCanonically(MBB);
|
||||
unsigned IdempotentInstCount = 0;
|
||||
Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
|
||||
DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
|
||||
|
||||
std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
|
||||
|
@ -579,6 +665,31 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
|
|||
|
||||
auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC);
|
||||
Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
|
||||
|
||||
// Here we renumber the def vregs for the idempotent instructions from the top
|
||||
// of the MachineBasicBlock so that they are named in the order that we sorted
|
||||
// them alphabetically. Eventually we wont need SkipVRegs because we will use
|
||||
// named vregs instead.
|
||||
unsigned gap = 1;
|
||||
SkipVRegs(gap, MRI, DummyRC);
|
||||
|
||||
auto MII = MBB->begin();
|
||||
for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
|
||||
MachineInstr &MI = *MII++;
|
||||
Changed = true;
|
||||
unsigned vRegToRename = MI.getOperand(0).getReg();
|
||||
auto Rename = MRI.createVirtualRegister(MRI.getRegClass(vRegToRename));
|
||||
|
||||
std::vector<MachineOperand *> RenameMOs;
|
||||
for (auto &MO : MRI.reg_operands(vRegToRename)) {
|
||||
RenameMOs.push_back(&MO);
|
||||
}
|
||||
|
||||
for (auto *MO : RenameMOs) {
|
||||
MO->setReg(Rename);
|
||||
}
|
||||
}
|
||||
|
||||
Changed |= doDefKillClear(MBB);
|
||||
|
||||
DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";);
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
# RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -run-pass mir-canonicalizer %s | FileCheck %s
|
||||
# These Idempotent instructions are sorted alphabetically (based on after the '=')
|
||||
# CHECK: %4353:gpr64 = MOVi64imm 4617315517961601024
|
||||
# CHECK: %4354:gpr32 = MOVi32imm 408
|
||||
# CHECK: %4355:gpr64all = IMPLICIT_DEF
|
||||
# CHECK: %4356:fpr64 = FMOVDi 20
|
||||
# CHECK: %4357:fpr64 = FMOVDi 112
|
||||
...
|
||||
---
|
||||
name: Proc8
|
||||
stack:
|
||||
- { id: 0, type: default, offset: 0, size: 4, alignment: 4,
|
||||
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
|
||||
local-offset: -4, di-variable: '', di-expression: '', di-location: '' }
|
||||
- { id: 1, type: default, offset: 0, size: 8, alignment: 8,
|
||||
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
|
||||
local-offset: -16, di-variable: '', di-expression: '', di-location: '' }
|
||||
- { id: 2, type: default, offset: 0, size: 8, alignment: 8,
|
||||
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
|
||||
local-offset: -24, di-variable: '', di-expression: '', di-location: '' }
|
||||
- { id: 3, type: default, offset: 0, size: 8, alignment: 8,
|
||||
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
|
||||
local-offset: -32, di-variable: '', di-expression: '', di-location: '' }
|
||||
- { id: 4, type: default, offset: 0, size: 8, alignment: 8,
|
||||
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
|
||||
local-offset: -40, di-variable: '', di-expression: '', di-location: '' }
|
||||
- { id: 5, type: default, offset: 0, size: 8, alignment: 8,
|
||||
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
|
||||
local-offset: -48, di-variable: '', di-expression: '', di-location: '' }
|
||||
- { id: 6, type: default, offset: 0, size: 8, alignment: 8,
|
||||
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
|
||||
local-offset: -56, di-variable: '', di-expression: '', di-location: '' }
|
||||
constants:
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0, $x1, $d0, $d1
|
||||
|
||||
%3:fpr64 = COPY $d1
|
||||
%2:fpr64 = COPY $d0
|
||||
%1:gpr64 = COPY $x1
|
||||
%0:gpr64common = COPY $x0
|
||||
STRXui %0, %stack.1, 0 :: (store 8)
|
||||
STRXui %1, %stack.2, 0 :: (store 8)
|
||||
STRDui %2, %stack.3, 0 :: (store 8)
|
||||
STRDui %3, %stack.4, 0 :: (store 8)
|
||||
|
||||
%4:fpr64 = FMOVDi 20
|
||||
%5:fpr64 = FADDDrr %2, killed %4
|
||||
STRDui %5, %stack.5, 0 :: (store 8)
|
||||
|
||||
%6:gpr32 = FCVTZSUWDr %5
|
||||
STRDroW %3, %0, killed %6, 1, 1
|
||||
|
||||
%7:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load 8)
|
||||
%8:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8)
|
||||
|
||||
%9:gpr32common = FCVTZSUWDr killed %8
|
||||
%10:fpr64 = LDRDroW %7, %9, 1, 1
|
||||
|
||||
%11:gpr32common = ADDWri %9, 1, 0
|
||||
STRDroW killed %10, %7, killed %11, 1, 1
|
||||
|
||||
%12:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8)
|
||||
%13:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load 8)
|
||||
|
||||
%14:gpr32common = FCVTZSUWDr %12
|
||||
%15:gpr32common = ADDWri killed %14, 30, 0
|
||||
STRDroW %12, killed %13, killed %15, 1, 1
|
||||
|
||||
%16:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8)
|
||||
STRDui killed %16, %stack.6, 0 :: (store 8)
|
||||
|
||||
%19:fpr64 = FMOVDi 112
|
||||
%46:gpr32 = MOVi32imm 408
|
||||
%43:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8)
|
||||
%44:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load 8)
|
||||
|
||||
%45:gpr32 = FCVTZSUWDr %43
|
||||
%47:gpr64common = SMADDLrrr killed %45, %46, killed %44
|
||||
%48:fpr64 = LDRDui %stack.6, 0 :: (dereferenceable load 8)
|
||||
|
||||
%49:gpr32 = FCVTZSUWDr killed %48
|
||||
STRDroW %43, killed %47, killed %49, 1, 1
|
||||
|
||||
%21:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load 8)
|
||||
%22:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8)
|
||||
|
||||
%23:gpr32 = FCVTZSUWDr killed %22
|
||||
%24:gpr32 = MOVi32imm 408
|
||||
%25:gpr64common = SMADDLrrr %23, %24, killed %21
|
||||
%26:gpr64sp = ADDXrx killed %25, %23, 51
|
||||
%27:fpr64 = LDURDi %26, -8
|
||||
%29:fpr64 = FADDDrr killed %27, %19
|
||||
STURDi killed %29, %26, -8
|
||||
|
||||
%30:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load 8)
|
||||
%31:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8)
|
||||
|
||||
%32:gpr32common = FCVTZSUWDr killed %31
|
||||
%34:gpr64all = IMPLICIT_DEF
|
||||
%33:gpr64 = INSERT_SUBREG %34, %32, %subreg.sub_32
|
||||
%35:gpr64 = SBFMXri killed %33, 61, 31
|
||||
%36:fpr64 = LDRDroX killed %30, %35, 0, 0
|
||||
%37:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load 8)
|
||||
|
||||
%38:gpr32common = ADDWri %32, 20, 0
|
||||
%39:gpr64common = SMADDLrrr killed %38, %24, killed %37
|
||||
STRDroX killed %36, killed %39, %35, 0, 0
|
||||
|
||||
%40:gpr64 = MOVi64imm 4617315517961601024
|
||||
|
||||
%42:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load 8)
|
||||
$w0 = COPY %42
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
Loading…
Reference in New Issue