[ARM] Revert WhileLoopStartLR to DoLoopStart

If a WhileLoopStartLR is reverted due to calls in the preheader, we may
still be able to instead create a DoLoopStart, preserving the low
overhead loop. This adds code for that, only reverting the
WhileLoopStartR to a Br/Cmp, leaving the rest of the low overhead loop
in place.

Differential Revision: https://reviews.llvm.org/D98413
This commit is contained in:
David Green 2021-03-25 16:44:15 +00:00
parent c40cea6f08
commit d97189600e
3 changed files with 52 additions and 22 deletions

View File

@ -273,11 +273,28 @@ bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
// Check if there is an illegal instruction (a call) in the low overhead loop
// and if so revert it now before we get any further. While loops also need to
// check the preheaders.
SmallPtrSet<MachineBasicBlock *, 4> MBBs(ML->block_begin(), ML->block_end());
if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR)
MBBs.insert(ML->getHeader()->pred_begin(), ML->getHeader()->pred_end());
for (MachineBasicBlock *MBB : MBBs) {
// check the preheaders, but can be reverted to a DLS loop if needed.
auto *PreHeader = ML->getLoopPreheader();
if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader &&
LoopStart->getParent() != PreHeader) {
for (MachineInstr &MI : *PreHeader) {
if (MI.isCall()) {
// Create a t2DoLoopStart at the end of the preheader.
MachineInstrBuilder MIB =
BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
MIB.add(LoopStart->getOperand(0));
MIB.add(LoopStart->getOperand(1));
// Revert the t2WhileLoopStartLR to a CMP and Br.
RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
LoopStart = MIB;
break;
}
}
}
for (MachineBasicBlock *MBB : ML->blocks()) {
for (MachineInstr &MI : *MBB) {
if (MI.isCall()) {
LLVM_DEBUG(dbgs() << "Found call in loop, reverting: " << MI);

View File

@ -77,24 +77,38 @@ static inline bool isLoopStart(MachineInstr &MI) {
// WhileLoopStart holds the exit block, so produce a subs Op0, Op1, 0 and then a
// beq that branches to the exit branch.
// If UseCmp is true, this will create a t2CMP instead of a t2SUBri, meaning the
// value of LR into the loop will not be setup. This is used if the LR setup is
// done via another means (via a t2DoLoopStart, for example).
inline void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII,
unsigned BrOpc = ARM::t2Bcc) {
unsigned BrOpc = ARM::t2Bcc,
bool UseCmp = false) {
MachineBasicBlock *MBB = MI->getParent();
assert(MI->getOpcode() == ARM::t2WhileLoopStartLR &&
"Only expected a t2WhileLoopStartLR in RevertWhileLoopStartLR!");
// Subs
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
MIB.add(MI->getOperand(0));
MIB.add(MI->getOperand(1));
MIB.addImm(0);
MIB.addImm(ARMCC::AL);
MIB.addReg(ARM::NoRegister);
MIB.addReg(ARM::CPSR, RegState::Define);
// Subs/Cmp
if (UseCmp) {
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
MIB.add(MI->getOperand(1));
MIB.addImm(0);
MIB.addImm(ARMCC::AL);
MIB.addReg(ARM::NoRegister);
} else {
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
MIB.add(MI->getOperand(0));
MIB.add(MI->getOperand(1));
MIB.addImm(0);
MIB.addImm(ARMCC::AL);
MIB.addReg(ARM::NoRegister);
MIB.addReg(ARM::CPSR, RegState::Define);
}
// Branch
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
MIB.add(MI->getOperand(2)); // branch target
MIB.addImm(ARMCC::EQ); // condition code
MIB.addReg(ARM::CPSR);

View File

@ -322,21 +322,20 @@ define void @callinpreheader(i32* noalias nocapture readonly %pAngle, i32* nocap
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: subs r6, r2, #0
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: beq .LBB3_3
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: cbz r2, .LBB3_3
; CHECK-NEXT: @ %bb.1: @ %for.body.ph
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: bl callee
; CHECK-NEXT: dls lr, r6
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: .LBB3_2: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r1, [r5], #4
; CHECK-NEXT: subs r6, #1
; CHECK-NEXT: add r0, r1
; CHECK-NEXT: cbz r6, .LBB3_3
; CHECK-NEXT: le .LBB3_2
; CHECK-NEXT: le lr, .LBB3_2
; CHECK-NEXT: .LBB3_3: @ %for.cond.cleanup
; CHECK-NEXT: str r0, [r4]
; CHECK-NEXT: pop {r4, r5, r6, pc}