forked from OSchip/llvm-project
[ARM][LowOverheadLoops] Revert after read/write
Currently we check whether LR is stored/loaded to/from inbetween the loop decrement and loop end pseudo instructions. There's two problems here: - It relies on all load/store instructions being labelled as such in tablegen. - Actually any use of loop decrement is troublesome because the value doesn't exist! So we need to check for any read/write of LR that occurs between the two instructions and revert if we find anything. Differential Revision: https://reviews.llvm.org/D65792 llvm-svn: 368130
This commit is contained in:
parent
cb87f3734b
commit
173de03740
|
@ -11,8 +11,7 @@
|
|||
/// The expectation is that the loop contains three pseudo instructions:
|
||||
/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
|
||||
/// form should be in the preheader, whereas the while form should be in the
|
||||
/// preheaders only predecessor. TODO: Could DoLoopStart get moved into the
|
||||
/// pre-preheader?
|
||||
/// preheaders only predecessor.
|
||||
/// - t2LoopDec - placed within in the loop body.
|
||||
/// - t2LoopEnd - the loop latch terminator.
|
||||
///
|
||||
|
@ -176,19 +175,25 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
|
|||
// faster than performing a sub,cmp,br or even subs,br.
|
||||
Revert = true;
|
||||
|
||||
if (!Dec)
|
||||
if (!Dec || End)
|
||||
continue;
|
||||
|
||||
// If we find that we load/store LR between LoopDec and LoopEnd, expect
|
||||
// that the decremented value has been spilled to the stack. Because
|
||||
// this value isn't actually going to be produced until the latch, by LE,
|
||||
// we would need to generate a real sub. The value is also likely to be
|
||||
// reloaded for use of LoopEnd - in which in case we'd need to perform
|
||||
// an add because it gets negated again by LE! The other option is to
|
||||
// then generate the other form of LE which doesn't perform the sub.
|
||||
if (MI.mayLoad() || MI.mayStore())
|
||||
Revert =
|
||||
MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR;
|
||||
// If we find that LR has been written or read between LoopDec and
|
||||
// LoopEnd, expect that the decremented value is being used else where.
|
||||
// Because this value isn't actually going to be produced until the
|
||||
// latch, by LE, we would need to generate a real sub. The value is also
|
||||
// likely to be copied/reloaded for use of LoopEnd - in which in case
|
||||
// we'd need to perform an add because it gets subtracted again by LE!
|
||||
// The other option is to then generate the other form of LE which doesn't
|
||||
// perform the sub.
|
||||
for (auto &MO : MI.operands()) {
|
||||
if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() &&
|
||||
MO.getReg() == ARM::LR) {
|
||||
LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI);
|
||||
Revert = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (Dec && End && Revert)
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s
|
||||
|
||||
# CHECK: while.body:
|
||||
# CHECK-NOT: t2DLS
|
||||
# CHECK-NOT: t2LEUpdate
|
||||
|
||||
--- |
|
||||
define i32 @mov_between_dec_end(i32 %n) #0 {
|
||||
entry:
|
||||
%cmp6 = icmp eq i32 %n, 0
|
||||
br i1 %cmp6, label %while.end, label %while.body.preheader
|
||||
|
||||
while.body.preheader: ; preds = %entry
|
||||
call void @llvm.set.loop.iterations.i32(i32 %n)
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body, %while.body.preheader
|
||||
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
|
||||
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
|
||||
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
|
||||
%add = add i32 %1, 0
|
||||
%2 = icmp ne i32 %1, 0
|
||||
br i1 %2, label %while.body, label %while.end
|
||||
|
||||
while.end: ; preds = %while.body, %entry
|
||||
%res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
|
||||
ret i32 %res.0.lcssa
|
||||
}
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare void @llvm.set.loop.iterations.i32(i32) #1
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
|
||||
|
||||
attributes #0 = { "target-features"="+mve.fp" }
|
||||
attributes #1 = { noduplicate nounwind }
|
||||
attributes #2 = { nounwind }
|
||||
|
||||
...
|
||||
---
|
||||
name: mov_between_dec_end
|
||||
alignment: 1
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: false
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
- { reg: '$r0', virtual-reg: '' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 16
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 4
|
||||
adjustsStack: true
|
||||
hasCalls: true
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 0
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack:
|
||||
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
callSites: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.4(0x30000000), %bb.1(0x50000000)
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
||||
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||
frame-setup CFI_INSTRUCTION offset $r7, -8
|
||||
frame-setup CFI_INSTRUCTION offset $r5, -12
|
||||
frame-setup CFI_INSTRUCTION offset $r4, -16
|
||||
tCBZ $r0, %bb.4
|
||||
|
||||
bb.1.while.body.preheader:
|
||||
successors: %bb.2(0x80000000)
|
||||
|
||||
$lr = tMOVr $r0, 14, $noreg
|
||||
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
t2DoLoopStart killed $r0
|
||||
|
||||
bb.2.while.body:
|
||||
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
|
||||
renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg
|
||||
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||
renamable $r4 = tMOVr $lr, 14, $noreg
|
||||
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
|
||||
tB %bb.3, 14, $noreg
|
||||
|
||||
bb.3.while.end:
|
||||
$r0 = tMOVr killed $r4, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
bb.4:
|
||||
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
$r0 = tMOVr killed $r4, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
...
|
||||
|
|
@ -0,0 +1,128 @@
|
|||
# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s
|
||||
|
||||
# CHECK: while.body:
|
||||
# CHECK-NOT: t2DLS
|
||||
# CHECK-NOT: t2LEUpdate
|
||||
|
||||
--- |
|
||||
define i32 @mov_between_dec_end(i32 %n) #0 {
|
||||
entry:
|
||||
%cmp6 = icmp eq i32 %n, 0
|
||||
br i1 %cmp6, label %while.end, label %while.body.preheader
|
||||
|
||||
while.body.preheader: ; preds = %entry
|
||||
call void @llvm.set.loop.iterations.i32(i32 %n)
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body, %while.body.preheader
|
||||
%res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
|
||||
%0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
|
||||
%1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
|
||||
%add = add i32 %1, 2
|
||||
%2 = icmp ne i32 %1, 0
|
||||
br i1 %2, label %while.body, label %while.end
|
||||
|
||||
while.end: ; preds = %while.body, %entry
|
||||
%res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
|
||||
ret i32 %res.0.lcssa
|
||||
}
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare void @llvm.set.loop.iterations.i32(i32) #1
|
||||
|
||||
; Function Attrs: noduplicate nounwind
|
||||
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
|
||||
|
||||
attributes #0 = { "target-features"="+mve.fp" }
|
||||
attributes #1 = { noduplicate nounwind }
|
||||
attributes #2 = { nounwind }
|
||||
|
||||
...
|
||||
---
|
||||
name: mov_between_dec_end
|
||||
alignment: 1
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: false
|
||||
hasWinCFI: false
|
||||
registers: []
|
||||
liveins:
|
||||
- { reg: '$r0', virtual-reg: '' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 16
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 4
|
||||
adjustsStack: true
|
||||
hasCalls: true
|
||||
stackProtector: ''
|
||||
maxCallFrameSize: 0
|
||||
cvBytesOfCalleeSavedRegisters: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
localFrameSize: 0
|
||||
savePoint: ''
|
||||
restorePoint: ''
|
||||
fixedStack: []
|
||||
stack:
|
||||
- { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
- { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4,
|
||||
stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
|
||||
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
callSites: []
|
||||
constants: []
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.4(0x30000000), %bb.1(0x50000000)
|
||||
|
||||
frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 16
|
||||
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||
frame-setup CFI_INSTRUCTION offset $r7, -8
|
||||
frame-setup CFI_INSTRUCTION offset $r5, -12
|
||||
frame-setup CFI_INSTRUCTION offset $r4, -16
|
||||
tCBZ $r0, %bb.4
|
||||
|
||||
bb.1.while.body.preheader:
|
||||
successors: %bb.2(0x80000000)
|
||||
|
||||
$lr = tMOVr $r0, 14, $noreg
|
||||
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
t2DoLoopStart killed $r0
|
||||
|
||||
bb.2.while.body:
|
||||
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
|
||||
|
||||
$r4 = tMOVr $lr, 14, $noreg
|
||||
renamable $lr = t2LoopDec killed renamable $lr, 1
|
||||
$lr = tMOVr $r4, 14, $noreg
|
||||
t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
|
||||
tB %bb.3, 14, $noreg
|
||||
|
||||
bb.3.while.end:
|
||||
$r0 = tMOVr killed $r4, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
bb.4:
|
||||
renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
|
||||
$r0 = tMOVr killed $r4, 14, $noreg
|
||||
tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
|
||||
|
||||
...
|
||||
|
Loading…
Reference in New Issue