[ARM][LowOverheadLoops] Revert remaining pseudos

ARMLowOverheadLoops would assert a failure if it did not find all the
pseudo instructions that comprise the hardware loop. Instead of doing
this, iterate through all the instructions of the function and revert
any remaining pseudo instructions that haven't been converted.

Differential Revision: https://reviews.llvm.org/D65080

llvm-svn: 366691
This commit is contained in:
Sam Parker 2019-07-22 14:16:40 +00:00
parent 25569296c6
commit 4379a40088
2 changed files with 226 additions and 12 deletions

View File

@ -54,6 +54,8 @@ namespace {
bool ProcessLoop(MachineLoop *ML);
bool RevertNonLoops(MachineFunction &MF);
void RevertWhile(MachineInstr *MI) const;
void RevertLoopDec(MachineInstr *MI) const;
@ -98,9 +100,15 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
if (!ML->getParentLoop())
Changed |= ProcessLoop(ML);
}
Changed |= RevertNonLoops(MF);
return Changed;
}
static bool IsLoopStart(MachineInstr &MI) {
return MI.getOpcode() == ARM::t2DoLoopStart ||
MI.getOpcode() == ARM::t2WhileLoopStart;
}
bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
bool Changed = false;
@ -111,15 +119,10 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
auto IsLoopStart = [](MachineInstr &MI) {
return MI.getOpcode() == ARM::t2DoLoopStart ||
MI.getOpcode() == ARM::t2WhileLoopStart;
};
// Search the given block for a loop start instruction. If one isn't found,
// and there's only one predecessor block, search that one too.
std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
[&IsLoopStart, &SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
[&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
for (auto &MI : *MBB) {
if (IsLoopStart(MI))
return &MI;
@ -165,6 +168,8 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
Dec = &MI;
else if (MI.getOpcode() == ARM::t2LoopEnd)
End = &MI;
else if (IsLoopStart(MI))
Start = &MI;
else if (MI.getDesc().isCall())
// TODO: Though the call will require LE to execute again, does this
// mean we should revert? Always executing LE hopefully should be
@ -190,11 +195,16 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
break;
}
LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;);
if (!Start && !Dec && !End) {
LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
return Changed;
} if (!(Start && Dec && End)) {
report_fatal_error("Failed to find all loop components");
} else if (!(Start && Dec && End)) {
LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n");
return false;
}
if (!End->getOperand(1).isMBB() ||
@ -216,10 +226,6 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
Revert = true;
}
LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start
<< " - Found Loop Dec: " << *Dec
<< " - Found Loop End: " << *End);
Expand(ML, Start, Dec, End, Revert);
return true;
}
@ -379,6 +385,44 @@ void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
}
}
bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
bool Changed = false;
for (auto &MBB : MF) {
SmallVector<MachineInstr*, 4> Starts;
SmallVector<MachineInstr*, 4> Decs;
SmallVector<MachineInstr*, 4> Ends;
for (auto &I : MBB) {
if (IsLoopStart(I))
Starts.push_back(&I);
else if (I.getOpcode() == ARM::t2LoopDec)
Decs.push_back(&I);
else if (I.getOpcode() == ARM::t2LoopEnd)
Ends.push_back(&I);
}
if (Starts.empty() && Decs.empty() && Ends.empty())
continue;
Changed = true;
for (auto *Start : Starts) {
if (Start->getOpcode() == ARM::t2WhileLoopStart)
RevertWhile(Start);
else
Start->eraseFromParent();
}
for (auto *Dec : Decs)
RevertLoopDec(Dec);
for (auto *End : Ends)
RevertLoopEnd(End);
}
return Changed;
}
FunctionPass *llvm::createARMLowOverheadLoopsPass() {
return new ARMLowOverheadLoops();
}

View File

@ -0,0 +1,170 @@
# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s --verify-machineinstrs -o - | FileCheck %s
# CHECK: name: non_loop
# CHECK: bb.0.entry:
# CHECK: tBcc %bb.2, 3
# CHECK: tB %bb.1, 14
# CHECK: bb.1.not.preheader:
# CHECK: t2CMPri $lr, 0, 14
# CHECK: t2Bcc %bb.3, 0
# CHECK: tB %bb.2
# CHECK: bb.2.while.body:
# CHECK: t2CMPri $lr, 0, 14
# CHECK: t2Bcc %bb.2, 1
# CHECK: tB %bb.3
# CHECK: bb.3.while.end:
--- |
define void @non_loop(i16* nocapture %a, i16* nocapture readonly %b, i32 %N) {
entry:
%cmp = icmp ugt i32 %N, 2
br i1 %cmp, label %not.preheader, label %while.body
not.preheader: ; preds = %entry
%test = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
br i1 %test, label %while.body, label %while.end
while.body: ; preds = %while.body, %not.preheader, %entry
%a.addr.06 = phi i16* [ %incdec.ptr1, %while.body ], [ %a, %entry ], [ %a, %not.preheader ]
%b.addr.05 = phi i16* [ %incdec.ptr, %while.body ], [ %b, %entry ], [ %b, %not.preheader ]
%count = phi i32 [ %count.next, %while.body ], [ %N, %entry ], [ %N, %not.preheader ]
%incdec.ptr = getelementptr inbounds i16, i16* %b.addr.05, i32 1
%load = load i16, i16* %b.addr.05, align 2
%incdec.ptr1 = getelementptr inbounds i16, i16* %a.addr.06, i32 1
store i16 %load, i16* %a.addr.06, align 2
%count.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1)
%cmp1 = icmp ne i32 %count.next, 0
br i1 %cmp1, label %while.body, label %while.end
while.end: ; preds = %while.body, %not.preheader
ret void
}
declare i1 @llvm.test.set.loop.iterations.i32(i32) #0
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
attributes #0 = { noduplicate nounwind }
attributes #1 = { nounwind }
...
---
name: non_loop
alignment: 1
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: false
hasWinCFI: false
registers: []
liveins:
- { reg: '$r0', virtual-reg: '' }
- { reg: '$r1', virtual-reg: '' }
- { reg: '$r2', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 32
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 2, name: '', type: spill-slot, offset: -20, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 3, name: '', type: spill-slot, offset: -24, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 4, name: '', type: spill-slot, offset: -28, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 5, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 6, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 7, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.1(0x40000000), %bb.2(0x40000000)
frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
frame-setup CFI_INSTRUCTION def_cfa_offset 8
frame-setup CFI_INSTRUCTION offset $lr, -4
frame-setup CFI_INSTRUCTION offset $r7, -8
$sp = frame-setup tSUBspi $sp, 6, 14, $noreg
frame-setup CFI_INSTRUCTION def_cfa_offset 32
tCMPi8 renamable $r2, 3, 14, $noreg, implicit-def $cpsr
$r3 = tMOVr $r0, 14, $noreg
$r12 = tMOVr $r1, 14, $noreg
$lr = tMOVr $r2, 14, $noreg
tSTRspi killed $r2, $sp, 5, 14, $noreg :: (store 4 into %stack.0)
tSTRspi killed $r1, $sp, 4, 14, $noreg :: (store 4 into %stack.1)
tSTRspi killed $r0, $sp, 3, 14, $noreg :: (store 4 into %stack.2)
tSTRspi killed $r3, $sp, 2, 14, $noreg :: (store 4 into %stack.3)
t2STRi12 killed $r12, $sp, 4, 14, $noreg :: (store 4 into %stack.4)
t2STRi12 killed $lr, $sp, 0, 14, $noreg :: (store 4 into %stack.5)
tBcc %bb.2, 3, $cpsr
tB %bb.1, 14, $noreg
bb.1.not.preheader:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
$r0 = tLDRspi $sp, 3, 14, $noreg :: (load 4 from %stack.2)
$r1 = tLDRspi $sp, 4, 14, $noreg :: (load 4 from %stack.1)
$r2 = tLDRspi $sp, 5, 14, $noreg :: (load 4 from %stack.0)
$r3 = tLDRspi $sp, 5, 14, $noreg :: (load 4 from %stack.0)
tSTRspi killed $r0, $sp, 2, 14, $noreg :: (store 4 into %stack.3)
tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store 4 into %stack.4)
tSTRspi killed $r2, $sp, 0, 14, $noreg :: (store 4 into %stack.5)
t2WhileLoopStart killed renamable $r3, %bb.3
tB %bb.2, 14, $noreg
bb.2.while.body:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
$r0 = tLDRspi $sp, 0, 14, $noreg :: (load 4 from %stack.5)
$r1 = tLDRspi $sp, 1, 14, $noreg :: (load 4 from %stack.4)
$r2 = tLDRspi $sp, 2, 14, $noreg :: (load 4 from %stack.3)
renamable $r3, renamable $r1 = t2LDRH_POST renamable $r1, 2, 14, $noreg :: (load 2 from %ir.b.addr.05)
early-clobber renamable $r2 = t2STRH_POST killed renamable $r3, renamable $r2, 2, 14, $noreg :: (store 2 into %ir.a.addr.06)
$lr = tMOVr killed $r0, 14, $noreg
renamable $lr = t2LoopDec killed renamable $lr, 1
$r0 = tMOVr $lr, 14, $noreg
tSTRspi killed $r0, $sp, 0, 14, $noreg :: (store 4 into %stack.5)
tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store 4 into %stack.4)
tSTRspi killed $r2, $sp, 2, 14, $noreg :: (store 4 into %stack.3)
t2LoopEnd killed renamable $lr, %bb.2
tB %bb.3, 14, $noreg
bb.3.while.end:
$sp = tADDspi $sp, 6, 14, $noreg
tPOP_RET 14, $noreg, def $r7, def $pc
...