forked from OSchip/llvm-project
[ARM] Updates to arm-block-placement pass
The patch makes two updates to the arm-block-placement pass: - Handle arbitrarily nested loops - Extends the search (for t2WhileLoopStartLR) to the predecessor of the preHeader. Differential Revision: https://reviews.llvm.org/D99649
This commit is contained in:
parent
489cdedd11
commit
58f3201a20
|
@ -38,6 +38,8 @@ public:
|
|||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *After);
|
||||
bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other);
|
||||
bool fixBackwardsWLS(MachineLoop *ML);
|
||||
bool processPostOrderLoops(MachineLoop *ML);
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesCFG();
|
||||
|
@ -57,9 +59,135 @@ char ARMBlockPlacement::ID = 0;
|
|||
INITIALIZE_PASS(ARMBlockPlacement, DEBUG_TYPE, "ARM block placement", false,
|
||||
false)
|
||||
|
||||
static MachineInstr *findWLSInBlock(MachineBasicBlock *MBB) {
|
||||
for (auto &Terminator : MBB->terminators()) {
|
||||
if (Terminator.getOpcode() == ARM::t2WhileLoopStartLR)
|
||||
return &Terminator;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Find t2WhileLoopStartLR in the loop predecessor BB or otherwise in its only
|
||||
/// predecessor. If found, returns (BB, WLS Instr) pair, otherwise a null pair.
|
||||
static MachineInstr *findWLS(MachineLoop *ML) {
|
||||
MachineBasicBlock *Predecessor = ML->getLoopPredecessor();
|
||||
if (!Predecessor)
|
||||
return nullptr;
|
||||
MachineInstr *WlsInstr = findWLSInBlock(Predecessor);
|
||||
if (WlsInstr)
|
||||
return WlsInstr;
|
||||
if (Predecessor->pred_size() == 1)
|
||||
return findWLSInBlock(*Predecessor->pred_begin());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Checks if loop has a backwards branching WLS, and if possible, fixes it.
|
||||
/// This requires checking the preheader (or it's predecessor) for a WLS and if
|
||||
/// its target is before it.
|
||||
/// If moving the target block wouldn't produce another backwards WLS or a new
|
||||
/// forwards LE branch, then move the target block after the preheader (or it's
|
||||
/// predecessor).
|
||||
bool ARMBlockPlacement::fixBackwardsWLS(MachineLoop *ML) {
|
||||
MachineInstr *WlsInstr = findWLS(ML);
|
||||
if (!WlsInstr)
|
||||
return false;
|
||||
|
||||
MachineBasicBlock *Predecessor = WlsInstr->getParent();
|
||||
MachineBasicBlock *LoopExit = WlsInstr->getOperand(2).getMBB();
|
||||
// We don't want to move the function's entry block.
|
||||
if (!LoopExit->getPrevNode())
|
||||
return false;
|
||||
if (blockIsBefore(Predecessor, LoopExit))
|
||||
return false;
|
||||
LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Found a backwards WLS from "
|
||||
<< Predecessor->getFullName() << " to "
|
||||
<< LoopExit->getFullName() << "\n");
|
||||
|
||||
// Make sure that moving the target block doesn't cause any of its WLSs
|
||||
// that were previously not backwards to become backwards
|
||||
bool CanMove = true;
|
||||
MachineInstr *WlsInLoopExit = findWLSInBlock(LoopExit);
|
||||
if (WlsInLoopExit) {
|
||||
// An example loop structure where the LoopExit can't be moved, since
|
||||
// bb1's WLS will become backwards once it's moved after bb3
|
||||
// bb1: - LoopExit
|
||||
// WLS bb2
|
||||
// bb2: - LoopExit2
|
||||
// ...
|
||||
// bb3: - Predecessor
|
||||
// WLS bb1
|
||||
// bb4: - Header
|
||||
MachineBasicBlock *LoopExit2 = WlsInLoopExit->getOperand(2).getMBB();
|
||||
// If the WLS from LoopExit to LoopExit2 is already backwards then
|
||||
// moving LoopExit won't affect it, so it can be moved. If LoopExit2 is
|
||||
// after the Predecessor then moving will keep it as a forward branch, so it
|
||||
// can be moved. If LoopExit2 is between the Predecessor and LoopExit then
|
||||
// moving LoopExit will make it a backwards branch, so it can't be moved
|
||||
// since we'd fix one and introduce one backwards branch.
|
||||
// TODO: Analyse the blocks to make a decision if it would be worth
|
||||
// moving LoopExit even if LoopExit2 is between the Predecessor and
|
||||
// LoopExit.
|
||||
if (!blockIsBefore(LoopExit2, LoopExit) &&
|
||||
(LoopExit2 == Predecessor || blockIsBefore(LoopExit2, Predecessor))) {
|
||||
LLVM_DEBUG(dbgs() << DEBUG_PREFIX
|
||||
<< "Can't move the target block as it would "
|
||||
"introduce a new backwards WLS branch\n");
|
||||
CanMove = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (CanMove) {
|
||||
// Make sure no LEs become forwards.
|
||||
// An example loop structure where the LoopExit can't be moved, since
|
||||
// bb2's LE will become forwards once bb1 is moved after bb3.
|
||||
// bb1: - LoopExit
|
||||
// bb2:
|
||||
// LE bb1 - Terminator
|
||||
// bb3: - Predecessor
|
||||
// WLS bb1
|
||||
// bb4: - Header
|
||||
for (auto It = LoopExit->getIterator(); It != Predecessor->getIterator();
|
||||
It++) {
|
||||
MachineBasicBlock *MBB = &*It;
|
||||
for (auto &Terminator : MBB->terminators()) {
|
||||
if (Terminator.getOpcode() != ARM::t2LoopEnd &&
|
||||
Terminator.getOpcode() != ARM::t2LoopEndDec)
|
||||
continue;
|
||||
MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB();
|
||||
// The LE will become forwards branching if it branches to LoopExit
|
||||
// which isn't allowed by the architecture, so we should avoid
|
||||
// introducing these.
|
||||
// TODO: Analyse the blocks to make a decision if it would be worth
|
||||
// moving LoopExit even if we'd introduce a forwards LE
|
||||
if (LETarget == LoopExit) {
|
||||
LLVM_DEBUG(dbgs() << DEBUG_PREFIX
|
||||
<< "Can't move the target block as it would "
|
||||
"introduce a new forwards LE branch\n");
|
||||
CanMove = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (CanMove)
|
||||
moveBasicBlock(LoopExit, Predecessor);
|
||||
|
||||
return CanMove;
|
||||
}
|
||||
|
||||
/// Updates ordering (of WLS BB and their loopExits) in inner loops first
|
||||
/// Returns true if any change was made in any of the loops
|
||||
bool ARMBlockPlacement::processPostOrderLoops(MachineLoop *ML) {
|
||||
bool Changed = false;
|
||||
for (auto *InnerML : *ML)
|
||||
Changed |= processPostOrderLoops(InnerML);
|
||||
return Changed | fixBackwardsWLS(ML);
|
||||
}
|
||||
|
||||
bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
return false;
|
||||
const ARMSubtarget &ST = static_cast<const ARMSubtarget &>(MF.getSubtarget());
|
||||
if (!ST.hasLOB())
|
||||
return false;
|
||||
|
@ -72,109 +200,9 @@ bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
|||
BBUtils->adjustBBOffsetsAfter(&MF.front());
|
||||
bool Changed = false;
|
||||
|
||||
// Find loops with a backwards branching WLS.
|
||||
// This requires looping over the loops in the function, checking each
|
||||
// preheader for a WLS and if its target is before the preheader. If moving
|
||||
// the target block wouldn't produce another backwards WLS or a new forwards
|
||||
// LE branch then move the target block after the preheader.
|
||||
for (auto *ML : *MLI) {
|
||||
MachineBasicBlock *Preheader = ML->getLoopPredecessor();
|
||||
if (!Preheader)
|
||||
continue;
|
||||
|
||||
for (auto &Terminator : Preheader->terminators()) {
|
||||
if (Terminator.getOpcode() != ARM::t2WhileLoopStartLR)
|
||||
continue;
|
||||
MachineBasicBlock *LoopExit = Terminator.getOperand(2).getMBB();
|
||||
// We don't want to move the function's entry block.
|
||||
if (!LoopExit->getPrevNode())
|
||||
continue;
|
||||
if (blockIsBefore(Preheader, LoopExit))
|
||||
continue;
|
||||
LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Found a backwards WLS from "
|
||||
<< Preheader->getFullName() << " to "
|
||||
<< LoopExit->getFullName() << "\n");
|
||||
|
||||
// Make sure that moving the target block doesn't cause any of its WLSs
|
||||
// that were previously not backwards to become backwards
|
||||
bool CanMove = true;
|
||||
for (auto &LoopExitTerminator : LoopExit->terminators()) {
|
||||
if (LoopExitTerminator.getOpcode() != ARM::t2WhileLoopStartLR)
|
||||
continue;
|
||||
// An example loop structure where the LoopExit can't be moved, since
|
||||
// bb1's WLS will become backwards once it's moved after bb3 bb1: -
|
||||
// LoopExit
|
||||
// WLS bb2 - LoopExit2
|
||||
// bb2:
|
||||
// ...
|
||||
// bb3: - Preheader
|
||||
// WLS bb1
|
||||
// bb4: - Header
|
||||
MachineBasicBlock *LoopExit2 =
|
||||
LoopExitTerminator.getOperand(2).getMBB();
|
||||
// If the WLS from LoopExit to LoopExit2 is already backwards then
|
||||
// moving LoopExit won't affect it, so it can be moved. If LoopExit2 is
|
||||
// after the Preheader then moving will keep it as a forward branch, so
|
||||
// it can be moved. If LoopExit2 is between the Preheader and LoopExit
|
||||
// then moving LoopExit will make it a backwards branch, so it can't be
|
||||
// moved since we'd fix one and introduce one backwards branch.
|
||||
// TODO: Analyse the blocks to make a decision if it would be worth
|
||||
// moving LoopExit even if LoopExit2 is between the Preheader and
|
||||
// LoopExit.
|
||||
if (!blockIsBefore(LoopExit2, LoopExit) &&
|
||||
(LoopExit2 == Preheader || blockIsBefore(LoopExit2, Preheader))) {
|
||||
LLVM_DEBUG(dbgs() << DEBUG_PREFIX
|
||||
<< "Can't move the target block as it would "
|
||||
"introduce a new backwards WLS branch\n");
|
||||
CanMove = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (CanMove) {
|
||||
// Make sure no LEs become forwards.
|
||||
// An example loop structure where the LoopExit can't be moved, since
|
||||
// bb2's LE will become forwards once bb1 is moved after bb3.
|
||||
// bb1: - LoopExit
|
||||
// bb2:
|
||||
// LE bb1 - Terminator
|
||||
// bb3: - Preheader
|
||||
// WLS bb1
|
||||
// bb4: - Header
|
||||
for (auto It = LoopExit->getIterator(); It != Preheader->getIterator();
|
||||
It++) {
|
||||
MachineBasicBlock *MBB = &*It;
|
||||
for (auto &Terminator : MBB->terminators()) {
|
||||
if (Terminator.getOpcode() != ARM::t2LoopEnd &&
|
||||
Terminator.getOpcode() != ARM::t2LoopEndDec)
|
||||
continue;
|
||||
MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB();
|
||||
// The LE will become forwards branching if it branches to LoopExit
|
||||
// which isn't allowed by the architecture, so we should avoid
|
||||
// introducing these.
|
||||
// TODO: Analyse the blocks to make a decision if it would be worth
|
||||
// moving LoopExit even if we'd introduce a forwards LE
|
||||
if (LETarget == LoopExit) {
|
||||
LLVM_DEBUG(dbgs() << DEBUG_PREFIX
|
||||
<< "Can't move the target block as it would "
|
||||
"introduce a new forwards LE branch\n");
|
||||
CanMove = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!CanMove)
|
||||
break;
|
||||
}
|
||||
|
||||
if (CanMove) {
|
||||
moveBasicBlock(LoopExit, Preheader);
|
||||
Changed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Find loops with a backwards branching WLS and fix if possible.
|
||||
for (auto *ML : *MLI)
|
||||
Changed |= processPostOrderLoops(ML);
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
@ -184,6 +212,8 @@ bool ARMBlockPlacement::blockIsBefore(MachineBasicBlock *BB,
|
|||
return BBUtils->getOffsetOf(Other) > BBUtils->getOffsetOf(BB);
|
||||
}
|
||||
|
||||
/// Moves a given MBB to be positioned after another MBB while maintaining
|
||||
/// existing control flow
|
||||
void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
|
||||
MachineBasicBlock *After) {
|
||||
LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " after "
|
||||
|
@ -195,6 +225,9 @@ void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
|
|||
|
||||
BB->moveAfter(After);
|
||||
|
||||
// Since only the blocks are to be moved around (but the control flow must
|
||||
// not change), if there were any fall-throughs (to/from adjacent blocks),
|
||||
// replace with unconditional branch to the fall through block.
|
||||
auto FixFallthrough = [&](MachineBasicBlock *From, MachineBasicBlock *To) {
|
||||
LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Checking for fallthrough from "
|
||||
<< From->getName() << " to " << To->getName() << "\n");
|
||||
|
|
|
@ -1,16 +1,19 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -run-pass=arm-block-placement %s -o - | FileCheck %s
|
||||
--- |
|
||||
; Checks that loopExitBlock gets moved (in forward direction) if there is a backwards WLS to it.
|
||||
define void @backwards_branch(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 {
|
||||
entry:
|
||||
unreachable
|
||||
}
|
||||
|
||||
; Checks that loopExitBlock does not get reordered (since it is entry block) even if there is a backwards WLS to it.
|
||||
define void @backwards_branch_entry_block(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 {
|
||||
entry:
|
||||
unreachable
|
||||
}
|
||||
|
||||
; Checks that loopExitBlock (containing a backwards WLS) is moved (in forward direction) if there is a backwards WLS to it.
|
||||
define void @backwards_branch_target_already_backwards(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 {
|
||||
entry:
|
||||
unreachable
|
||||
|
@ -21,16 +24,25 @@
|
|||
unreachable
|
||||
}
|
||||
|
||||
; Checks that loopExitBlock (to which a backwards LE exists) is not moved if moving it would cause the LE to become forwards branching.
|
||||
define void @backwards_branch_forwards_le(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) local_unnamed_addr #0 {
|
||||
entry:
|
||||
unreachable
|
||||
}
|
||||
|
||||
; Checks that a MachineFunction is unaffected if it doesn't contain any WLS (pseudo) instruction.
|
||||
define void @no_preheader(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) local_unnamed_addr #0 {
|
||||
entry:
|
||||
unreachable
|
||||
}
|
||||
|
||||
; Within a nested loop, checks that loopExit gets moved (in forward direction) if there exists a backwards WLS to it.
|
||||
; Both the WLS and loopExit are at depth=3.
|
||||
define void @nested_loops(i32 %n, i32 %m, i32 %l, i8* noalias %X, i8* noalias %Y) local_unnamed_addr #0 {
|
||||
entry:
|
||||
unreachable
|
||||
}
|
||||
|
||||
declare dso_local i32 @g(...) local_unnamed_addr #1
|
||||
|
||||
declare dso_local i32 @h(...) local_unnamed_addr #1
|
||||
|
@ -441,3 +453,188 @@ body: |
|
|||
bb.5:
|
||||
frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $r5, def $r7, def $pc
|
||||
...
|
||||
---
|
||||
name: nested_loops
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$r0' }
|
||||
- { reg: '$r1' }
|
||||
- { reg: '$r2' }
|
||||
- { reg: '$r3' }
|
||||
frameInfo:
|
||||
stackSize: 32
|
||||
maxAlignment: 4
|
||||
maxCallFrameSize: 0
|
||||
fixedStack:
|
||||
- { id: 0, size: 4, alignment: 8, isImmutable: true }
|
||||
stack:
|
||||
- { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '$lr',
|
||||
callee-saved-restored: false }
|
||||
- { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '$r10' }
|
||||
- { id: 2, type: spill-slot, offset: -12, size: 4, alignment: 4, callee-saved-register: '$r9' }
|
||||
- { id: 3, type: spill-slot, offset: -16, size: 4, alignment: 4, callee-saved-register: '$r8' }
|
||||
- { id: 4, type: spill-slot, offset: -20, size: 4, alignment: 4, callee-saved-register: '$r7' }
|
||||
- { id: 5, type: spill-slot, offset: -24, size: 4, alignment: 4, callee-saved-register: '$r6' }
|
||||
- { id: 6, type: spill-slot, offset: -28, size: 4, alignment: 4, callee-saved-register: '$r5' }
|
||||
- { id: 7, type: spill-slot, offset: -32, size: 4, alignment: 4, callee-saved-register: '$r4' }
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
; CHECK-LABEL: name: nested_loops
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $lr
|
||||
; CHECK: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $lr
|
||||
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 32
|
||||
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||
; CHECK: frame-setup CFI_INSTRUCTION offset $r10, -8
|
||||
; CHECK: frame-setup CFI_INSTRUCTION offset $r9, -12
|
||||
; CHECK: frame-setup CFI_INSTRUCTION offset $r8, -16
|
||||
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -20
|
||||
; CHECK: frame-setup CFI_INSTRUCTION offset $r6, -24
|
||||
; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -28
|
||||
; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -32
|
||||
; CHECK: tCMPi8 renamable $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||
; CHECK: t2IT 11, 8, implicit-def $itstate
|
||||
; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 11 /* CC::lt */, killed $cpsr, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $pc, implicit killed $itstate
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: liveins: $r0, $r1, $r2, $r3
|
||||
; CHECK: renamable $r12 = t2LDRi12 $sp, 32, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
|
||||
; CHECK: $r9 = tMOVr killed $r2, 14 /* CC::al */, $noreg
|
||||
; CHECK: renamable $r8 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.9(0x04000000), %bb.3(0x7c000000)
|
||||
; CHECK: liveins: $r0, $r1, $r3, $r8, $r9, $r12
|
||||
; CHECK: renamable $r8 = nuw nsw t2ADDri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 1, 14 /* CC::al */, $noreg
|
||||
; CHECK: tCMPhir renamable $r8, renamable $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||
; CHECK: renamable $r12 = t2ADDri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: t2Bcc %bb.9, 0 /* CC::eq */, killed $cpsr
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.4(0x50000000), %bb.2(0x30000000)
|
||||
; CHECK: liveins: $r0, $r1, $r3, $r8, $r9, $r12
|
||||
; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||
; CHECK: t2Bcc %bb.2, 11 /* CC::lt */, killed $cpsr
|
||||
; CHECK: bb.4:
|
||||
; CHECK: successors: %bb.6(0x80000000)
|
||||
; CHECK: liveins: $r0, $r1, $r3, $r8, $r9, $r12
|
||||
; CHECK: renamable $r4, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
|
||||
; CHECK: $r10 = tMOVr $r12, 14 /* CC::al */, $noreg
|
||||
; CHECK: $r2 = tMOVr $r3, 14 /* CC::al */, $noreg
|
||||
; CHECK: t2B %bb.6, 14 /* CC::al */, $noreg
|
||||
; CHECK: bb.6:
|
||||
; CHECK: successors: %bb.7(0x50000000), %bb.5(0x30000000)
|
||||
; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
|
||||
; CHECK: renamable $lr = t2WhileLoopStartLR killed renamable $r9, %bb.5, implicit-def dead $cpsr
|
||||
; CHECK: tB %bb.7, 14 /* CC::al */, $noreg
|
||||
; CHECK: bb.5:
|
||||
; CHECK: successors: %bb.2(0x04000000), %bb.6(0x7c000000)
|
||||
; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
|
||||
; CHECK: renamable $r4, dead $cpsr = nuw nsw tADDi8 killed renamable $r4, 1, 14 /* CC::al */, $noreg
|
||||
; CHECK: renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 1, 14 /* CC::al */, $noreg
|
||||
; CHECK: tCMPr renamable $r4, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||
; CHECK: renamable $r10 = t2ADDri killed renamable $r10, 1, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: t2Bcc %bb.2, 0 /* CC::eq */, killed $cpsr
|
||||
; CHECK: tB %bb.6, 14 /* CC::al */, $noreg
|
||||
; CHECK: bb.7:
|
||||
; CHECK: successors: %bb.8(0x80000000)
|
||||
; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
|
||||
; CHECK: $r5 = tMOVr $r10, 14 /* CC::al */, $noreg
|
||||
; CHECK: $r6 = tMOVr $r2, 14 /* CC::al */, $noreg
|
||||
; CHECK: t2B %bb.8, 14 /* CC::al */, $noreg
|
||||
; CHECK: bb.8:
|
||||
; CHECK: successors: %bb.8(0x7c000000), %bb.5(0x04000000)
|
||||
; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r8, $r9, $r10, $r12
|
||||
; CHECK: tSTRi killed $r0, $r1, 0, 14 /* CC::al */, $noreg
|
||||
; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.8, implicit-def dead $cpsr
|
||||
; CHECK: t2B %bb.5, 14 /* CC::al */, $noreg
|
||||
; CHECK: bb.9:
|
||||
; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $pc
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $lr
|
||||
|
||||
$sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $lr
|
||||
frame-setup CFI_INSTRUCTION def_cfa_offset 32
|
||||
frame-setup CFI_INSTRUCTION offset $lr, -4
|
||||
frame-setup CFI_INSTRUCTION offset $r10, -8
|
||||
frame-setup CFI_INSTRUCTION offset $r9, -12
|
||||
frame-setup CFI_INSTRUCTION offset $r8, -16
|
||||
frame-setup CFI_INSTRUCTION offset $r7, -20
|
||||
frame-setup CFI_INSTRUCTION offset $r6, -24
|
||||
frame-setup CFI_INSTRUCTION offset $r5, -28
|
||||
frame-setup CFI_INSTRUCTION offset $r4, -32
|
||||
tCMPi8 renamable $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||
t2IT 11, 8, implicit-def $itstate
|
||||
$sp = frame-destroy t2LDMIA_RET $sp, 11 /* CC::lt */, killed $cpsr, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $pc, implicit killed $itstate
|
||||
|
||||
bb.1:
|
||||
liveins: $r0, $r1, $r2, $r3
|
||||
|
||||
renamable $r12 = t2LDRi12 $sp, 32, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
|
||||
$r9 = tMOVr killed $r2, 14 /* CC::al */, $noreg
|
||||
renamable $r8 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
|
||||
t2B %bb.2, 14 /* CC::al */, $noreg
|
||||
|
||||
bb.8:
|
||||
successors: %bb.9(0x04000000), %bb.2(0x7c000000)
|
||||
liveins: $r0, $r1, $r3, $r8, $r9, $r12
|
||||
|
||||
renamable $r8 = nuw nsw t2ADDri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg
|
||||
renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 1, 14 /* CC::al */, $noreg
|
||||
tCMPhir renamable $r8, renamable $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||
renamable $r12 = t2ADDri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg
|
||||
t2Bcc %bb.9, 0 /* CC::eq */, killed $cpsr
|
||||
|
||||
bb.2:
|
||||
successors: %bb.3(0x50000000), %bb.8(0x30000000)
|
||||
liveins: $r0, $r1, $r3, $r8, $r9, $r12
|
||||
|
||||
tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||
t2Bcc %bb.8, 11 /* CC::lt */, killed $cpsr
|
||||
|
||||
bb.3:
|
||||
liveins: $r0, $r1, $r3, $r8, $r9, $r12
|
||||
|
||||
renamable $r4, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
|
||||
$r10 = tMOVr $r12, 14 /* CC::al */, $noreg
|
||||
$r2 = tMOVr $r3, 14 /* CC::al */, $noreg
|
||||
t2B %bb.4, 14 /* CC::al */, $noreg
|
||||
|
||||
bb.7:
|
||||
successors: %bb.8(0x04000000), %bb.4(0x7c000000)
|
||||
liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
|
||||
|
||||
renamable $r4, dead $cpsr = nuw nsw tADDi8 killed renamable $r4, 1, 14 /* CC::al */, $noreg
|
||||
renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 1, 14 /* CC::al */, $noreg
|
||||
tCMPr renamable $r4, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
||||
renamable $r10 = t2ADDri killed renamable $r10, 1, 14 /* CC::al */, $noreg, $noreg
|
||||
t2Bcc %bb.8, 0 /* CC::eq */, killed $cpsr
|
||||
|
||||
bb.4:
|
||||
successors: %bb.5(0x50000000), %bb.7(0x30000000)
|
||||
liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
|
||||
|
||||
renamable $lr = t2WhileLoopStartLR killed renamable $r9, %bb.7, implicit-def dead $cpsr
|
||||
|
||||
bb.5:
|
||||
liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
|
||||
|
||||
$r5 = tMOVr $r10, 14 /* CC::al */, $noreg
|
||||
$r6 = tMOVr $r2, 14 /* CC::al */, $noreg
|
||||
t2B %bb.6, 14 /* CC::al */, $noreg
|
||||
|
||||
bb.6:
|
||||
successors: %bb.6(0x7c000000), %bb.7(0x04000000)
|
||||
liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r8, $r9, $r10, $r12
|
||||
|
||||
tSTRi killed $r0, $r1, 0, 14 /* CC::al */, $noreg
|
||||
renamable $lr = t2LoopEndDec killed renamable $lr, %bb.6, implicit-def dead $cpsr
|
||||
t2B %bb.7, 14 /* CC::al */, $noreg
|
||||
|
||||
bb.9:
|
||||
$sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $pc
|
||||
|
||||
...
|
||||
|
|
|
@ -1077,18 +1077,10 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
|
|||
; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill
|
||||
; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill
|
||||
; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
|
||||
; CHECK-NEXT: b .LBB16_4
|
||||
; CHECK-NEXT: .LBB16_3: @ %while.end
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
|
||||
; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: subs.w r12, r12, #1
|
||||
; CHECK-NEXT: vstrb.8 q0, [r2], #8
|
||||
; CHECK-NEXT: add.w r0, r5, r0, lsl #1
|
||||
; CHECK-NEXT: add.w r5, r0, #8
|
||||
; CHECK-NEXT: beq.w .LBB16_12
|
||||
; CHECK-NEXT: .LBB16_4: @ %while.body
|
||||
; CHECK-NEXT: b .LBB16_3
|
||||
; CHECK-NEXT: .LBB16_3: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: @ Child Loop BB16_6 Depth 2
|
||||
; CHECK-NEXT: @ Child Loop BB16_5 Depth 2
|
||||
; CHECK-NEXT: @ Child Loop BB16_10 Depth 2
|
||||
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldrh.w lr, [r3, #14]
|
||||
|
@ -1125,14 +1117,14 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
|
|||
; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
|
||||
; CHECK-NEXT: vfma.f16 q0, q1, lr
|
||||
; CHECK-NEXT: cmp r0, #16
|
||||
; CHECK-NEXT: blo .LBB16_7
|
||||
; CHECK-NEXT: @ %bb.5: @ %for.body.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
|
||||
; CHECK-NEXT: blo .LBB16_6
|
||||
; CHECK-NEXT: @ %bb.4: @ %for.body.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
|
||||
; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
|
||||
; CHECK-NEXT: dls lr, r0
|
||||
; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: .LBB16_6: @ %for.body
|
||||
; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1
|
||||
; CHECK-NEXT: .LBB16_5: @ %for.body
|
||||
; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1
|
||||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: ldrh r0, [r6], #16
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r5]
|
||||
|
@ -1163,33 +1155,39 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
|
|||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
; CHECK-NEXT: adds r5, #16
|
||||
; CHECK-NEXT: vfma.f16 q0, q1, r4
|
||||
; CHECK-NEXT: le lr, .LBB16_6
|
||||
; CHECK-NEXT: b .LBB16_8
|
||||
; CHECK-NEXT: .LBB16_7: @ in Loop: Header=BB16_4 Depth=1
|
||||
; CHECK-NEXT: le lr, .LBB16_5
|
||||
; CHECK-NEXT: b .LBB16_7
|
||||
; CHECK-NEXT: .LBB16_6: @ in Loop: Header=BB16_3 Depth=1
|
||||
; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: .LBB16_8: @ %for.end
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
|
||||
; CHECK-NEXT: .LBB16_7: @ %for.end
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
|
||||
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: subs.w lr, r0, #0
|
||||
; CHECK-NEXT: beq.w .LBB16_3
|
||||
; CHECK-NEXT: wls lr, r0, .LBB16_8
|
||||
; CHECK-NEXT: b .LBB16_9
|
||||
; CHECK-NEXT: .LBB16_8: @ %while.end
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
|
||||
; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: subs.w r12, r12, #1
|
||||
; CHECK-NEXT: vstrb.8 q0, [r2], #8
|
||||
; CHECK-NEXT: add.w r0, r5, r0, lsl #1
|
||||
; CHECK-NEXT: add.w r5, r0, #8
|
||||
; CHECK-NEXT: beq .LBB16_12
|
||||
; CHECK-NEXT: b .LBB16_3
|
||||
; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
|
||||
; CHECK-NEXT: mov r0, r5
|
||||
; CHECK-NEXT: .LBB16_10: @ %while.body76
|
||||
; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1
|
||||
; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1
|
||||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: ldrh r4, [r6], #2
|
||||
; CHECK-NEXT: vldrh.u16 q1, [r0], #2
|
||||
; CHECK-NEXT: vfma.f16 q0, q1, r4
|
||||
; CHECK-NEXT: subs.w lr, lr, #1
|
||||
; CHECK-NEXT: bne .LBB16_10
|
||||
; CHECK-NEXT: b .LBB16_11
|
||||
; CHECK-NEXT: .LBB16_11: @ %while.end.loopexit
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
|
||||
; CHECK-NEXT: le lr, .LBB16_10
|
||||
; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
|
||||
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
|
||||
; CHECK-NEXT: add.w r5, r5, r0, lsl #1
|
||||
; CHECK-NEXT: b .LBB16_3
|
||||
; CHECK-NEXT: b .LBB16_8
|
||||
; CHECK-NEXT: .LBB16_12: @ %if.end
|
||||
; CHECK-NEXT: add sp, #24
|
||||
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
|
|
|
@ -1071,18 +1071,10 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
|
|||
; CHECK-NEXT: str r6, [sp, #16] @ 4-byte Spill
|
||||
; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
|
||||
; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
|
||||
; CHECK-NEXT: b .LBB16_4
|
||||
; CHECK-NEXT: .LBB16_3: @ %while.end
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
|
||||
; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
|
||||
; CHECK-NEXT: subs.w r12, r12, #1
|
||||
; CHECK-NEXT: vstrb.8 q0, [r2], #16
|
||||
; CHECK-NEXT: add.w r0, r4, r0, lsl #2
|
||||
; CHECK-NEXT: add.w r4, r0, #16
|
||||
; CHECK-NEXT: beq .LBB16_12
|
||||
; CHECK-NEXT: .LBB16_4: @ %while.body
|
||||
; CHECK-NEXT: b .LBB16_3
|
||||
; CHECK-NEXT: .LBB16_3: @ %while.body
|
||||
; CHECK-NEXT: @ =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: @ Child Loop BB16_6 Depth 2
|
||||
; CHECK-NEXT: @ Child Loop BB16_5 Depth 2
|
||||
; CHECK-NEXT: @ Child Loop BB16_10 Depth 2
|
||||
; CHECK-NEXT: add.w lr, r10, #8
|
||||
; CHECK-NEXT: vldrw.u32 q0, [r1], #16
|
||||
|
@ -1109,14 +1101,14 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
|
|||
; CHECK-NEXT: vfma.f32 q0, q3, r11
|
||||
; CHECK-NEXT: cmp r0, #16
|
||||
; CHECK-NEXT: vfma.f32 q0, q1, r8
|
||||
; CHECK-NEXT: blo .LBB16_7
|
||||
; CHECK-NEXT: @ %bb.5: @ %for.body.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
|
||||
; CHECK-NEXT: blo .LBB16_6
|
||||
; CHECK-NEXT: @ %bb.4: @ %for.body.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
|
||||
; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
|
||||
; CHECK-NEXT: dls lr, r0
|
||||
; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: .LBB16_6: @ %for.body
|
||||
; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1
|
||||
; CHECK-NEXT: .LBB16_5: @ %for.body
|
||||
; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1
|
||||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: ldm.w r7, {r0, r3, r5, r6, r8, r11}
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r4], #32
|
||||
|
@ -1137,34 +1129,40 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
|
|||
; CHECK-NEXT: vfma.f32 q0, q2, r11
|
||||
; CHECK-NEXT: vfma.f32 q0, q3, r9
|
||||
; CHECK-NEXT: vfma.f32 q0, q1, r1
|
||||
; CHECK-NEXT: le lr, .LBB16_6
|
||||
; CHECK-NEXT: b .LBB16_8
|
||||
; CHECK-NEXT: .LBB16_7: @ in Loop: Header=BB16_4 Depth=1
|
||||
; CHECK-NEXT: le lr, .LBB16_5
|
||||
; CHECK-NEXT: b .LBB16_7
|
||||
; CHECK-NEXT: .LBB16_6: @ in Loop: Header=BB16_3 Depth=1
|
||||
; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
|
||||
; CHECK-NEXT: .LBB16_8: @ %for.end
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
|
||||
; CHECK-NEXT: .LBB16_7: @ %for.end
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
|
||||
; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
|
||||
; CHECK-NEXT: ldrd r0, r9, [sp, #20] @ 8-byte Folded Reload
|
||||
; CHECK-NEXT: subs.w lr, r0, #0
|
||||
; CHECK-NEXT: beq .LBB16_3
|
||||
; CHECK-NEXT: wls lr, r0, .LBB16_8
|
||||
; CHECK-NEXT: b .LBB16_9
|
||||
; CHECK-NEXT: .LBB16_8: @ %while.end
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
|
||||
; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
|
||||
; CHECK-NEXT: subs.w r12, r12, #1
|
||||
; CHECK-NEXT: vstrb.8 q0, [r2], #16
|
||||
; CHECK-NEXT: add.w r0, r4, r0, lsl #2
|
||||
; CHECK-NEXT: add.w r4, r0, #16
|
||||
; CHECK-NEXT: beq .LBB16_12
|
||||
; CHECK-NEXT: b .LBB16_3
|
||||
; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
|
||||
; CHECK-NEXT: mov r3, r4
|
||||
; CHECK-NEXT: .LBB16_10: @ %while.body76
|
||||
; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1
|
||||
; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1
|
||||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: ldr r0, [r7], #4
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r3], #4
|
||||
; CHECK-NEXT: vfma.f32 q0, q1, r0
|
||||
; CHECK-NEXT: subs.w lr, lr, #1
|
||||
; CHECK-NEXT: bne .LBB16_10
|
||||
; CHECK-NEXT: b .LBB16_11
|
||||
; CHECK-NEXT: .LBB16_11: @ %while.end.loopexit
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
|
||||
; CHECK-NEXT: le lr, .LBB16_10
|
||||
; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit
|
||||
; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
|
||||
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
|
||||
; CHECK-NEXT: add.w r4, r4, r0, lsl #2
|
||||
; CHECK-NEXT: b .LBB16_3
|
||||
; CHECK-NEXT: b .LBB16_8
|
||||
; CHECK-NEXT: .LBB16_12: @ %if.end
|
||||
; CHECK-NEXT: add sp, #32
|
||||
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
|
||||
|
|
Loading…
Reference in New Issue