forked from OSchip/llvm-project
[Thumb] Fix assembler error 'cannot honor width suffix pop {lr}'
Summary: * avoid generating POP {LR} in Thumb1 epilogues * combine MOV LR, Rx + BX LR -> BX Rx in a peephole optimization pass * combine POP {LR} + B + BX LR -> POP {PC} on v5T+ Test cases by Ana Pazos Differential Revision: http://reviews.llvm.org/D15707 llvm-svn: 256523
This commit is contained in:
parent
b3c53e512f
commit
2aca0c622a
|
@ -151,6 +151,7 @@ namespace {
|
|||
bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
|
||||
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
|
||||
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
|
||||
bool CombineMovBx(MachineBasicBlock &MBB);
|
||||
};
|
||||
char ARMLoadStoreOpt::ID = 0;
|
||||
}
|
||||
|
@ -1825,6 +1826,30 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
|
||||
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
|
||||
if (MBBI == MBB.begin() || MBBI == MBB.end() ||
|
||||
MBBI->getOpcode() != ARM::tBX_RET)
|
||||
return false;
|
||||
|
||||
MachineBasicBlock::iterator Prev = MBBI;
|
||||
--Prev;
|
||||
if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
|
||||
return false;
|
||||
|
||||
for (auto Use : Prev->uses())
|
||||
if (Use.isKill()) {
|
||||
AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
|
||||
.addReg(Use.getReg(), RegState::Kill))
|
||||
.copyImplicitOps(&*MBBI);
|
||||
MBB.erase(MBBI);
|
||||
MBB.erase(Prev);
|
||||
return true;
|
||||
}
|
||||
|
||||
llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
|
||||
}
|
||||
|
||||
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
|
||||
MF = &Fn;
|
||||
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
|
||||
|
@ -1844,6 +1869,8 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
|
|||
Modified |= LoadStoreMultipleOpti(MBB);
|
||||
if (STI->hasV5TOps())
|
||||
Modified |= MergeReturnIntoLDM(MBB);
|
||||
if (isThumb1)
|
||||
Modified |= CombineMovBx(MBB);
|
||||
}
|
||||
|
||||
Allocator.DestroyAll();
|
||||
|
|
|
@ -433,14 +433,16 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
|
|||
auto MBBI = MBB.getFirstTerminator();
|
||||
bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize;
|
||||
if (CanRestoreDirectly) {
|
||||
if (MBBI != MBB.end())
|
||||
if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB)
|
||||
CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET ||
|
||||
MBBI->getOpcode() == ARM::tPOP_RET);
|
||||
else {
|
||||
assert(MBB.back().getOpcode() == ARM::tPOP);
|
||||
auto MBBI_prev = MBBI;
|
||||
MBBI_prev--;
|
||||
assert(MBBI_prev->getOpcode() == ARM::tPOP);
|
||||
assert(MBB.succ_size() == 1);
|
||||
if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET)
|
||||
MBBI--; // Replace the final tPOP with a tPOP_RET.
|
||||
MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET.
|
||||
else
|
||||
CanRestoreDirectly = false;
|
||||
}
|
||||
|
@ -454,8 +456,7 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
|
|||
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)));
|
||||
// Copy implicit ops and popped registers, if any.
|
||||
for (auto MO: MBBI->operands())
|
||||
if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
|
||||
MO.getReg() != ARM::LR)
|
||||
if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
|
||||
MIB.addOperand(MO);
|
||||
MIB.addReg(ARM::PC, RegState::Define);
|
||||
// Erase the old instruction (tBX_RET or tPOP).
|
||||
|
@ -529,32 +530,26 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
|
|||
.addReg(PopReg, RegState::Kill));
|
||||
}
|
||||
|
||||
bool AddBx = false;
|
||||
if (MBBI == MBB.end()) {
|
||||
MachineInstr& Pop = MBB.back();
|
||||
assert(Pop.getOpcode() == ARM::tPOP);
|
||||
Pop.RemoveOperand(Pop.findRegisterDefOperandIdx(ARM::LR));
|
||||
} else if (MBBI->getOpcode() == ARM::tPOP_RET) {
|
||||
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
|
||||
// We couldn't use the direct restoration above, so
|
||||
// perform the opposite conversion: tPOP_RET to tPOP.
|
||||
MachineInstrBuilder MIB =
|
||||
AddDefaultPred(
|
||||
BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)));
|
||||
unsigned Popped = 0;
|
||||
bool Popped = false;
|
||||
for (auto MO: MBBI->operands())
|
||||
if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
|
||||
MO.getReg() != ARM::PC) {
|
||||
MIB.addOperand(MO);
|
||||
if (!MO.isImplicit())
|
||||
Popped++;
|
||||
Popped = true;
|
||||
}
|
||||
// Is there anything left to pop?
|
||||
if (!Popped)
|
||||
MBB.erase(MIB.getInstr());
|
||||
// Erase the old instruction.
|
||||
MBB.erase(MBBI);
|
||||
MBBI = MBB.end();
|
||||
AddBx = true;
|
||||
MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)));
|
||||
}
|
||||
|
||||
assert(PopReg && "Do not know how to get LR");
|
||||
|
@ -563,31 +558,14 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
|
|||
|
||||
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
|
||||
|
||||
if (!TemporaryReg && MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET) {
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
|
||||
.addReg(PopReg, RegState::Kill);
|
||||
AddDefaultPred(MIB);
|
||||
MIB.copyImplicitOps(&*MBBI);
|
||||
// erase the old tBX_RET instruction
|
||||
MBB.erase(MBBI);
|
||||
return true;
|
||||
}
|
||||
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
|
||||
.addReg(ARM::LR, RegState::Define)
|
||||
.addReg(PopReg, RegState::Kill));
|
||||
|
||||
if (AddBx && !TemporaryReg) {
|
||||
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
|
||||
.addReg(PopReg, RegState::Kill));
|
||||
} else {
|
||||
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
|
||||
.addReg(ARM::LR, RegState::Define)
|
||||
.addReg(PopReg, RegState::Kill));
|
||||
}
|
||||
if (TemporaryReg) {
|
||||
if (TemporaryReg)
|
||||
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
|
||||
.addReg(PopReg, RegState::Define)
|
||||
.addReg(TemporaryReg, RegState::Kill));
|
||||
if (AddBx)
|
||||
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET)));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -645,28 +623,34 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
|
|||
MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
|
||||
AddDefaultPred(MIB);
|
||||
|
||||
bool NumRegs = false;
|
||||
bool NeedsPop = false;
|
||||
for (unsigned i = CSI.size(); i != 0; --i) {
|
||||
unsigned Reg = CSI[i-1].getReg();
|
||||
if (Reg == ARM::LR && MBB.succ_empty()) {
|
||||
// Special epilogue for vararg functions. See emitEpilogue
|
||||
if (isVarArg)
|
||||
if (Reg == ARM::LR) {
|
||||
if (MBB.succ_empty()) {
|
||||
// Special epilogue for vararg functions. See emitEpilogue
|
||||
if (isVarArg)
|
||||
continue;
|
||||
// ARMv4T requires BX, see emitEpilogue
|
||||
if (!STI.hasV5TOps())
|
||||
continue;
|
||||
Reg = ARM::PC;
|
||||
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
|
||||
if (MI != MBB.end())
|
||||
MIB.copyImplicitOps(&*MI);
|
||||
MI = MBB.erase(MI);
|
||||
} else
|
||||
// LR may only be popped into PC, as part of return sequence.
|
||||
// If this isn't the return sequence, we'll need emitPopSpecialFixUp
|
||||
// to restore LR the hard way.
|
||||
continue;
|
||||
// ARMv4T requires BX, see emitEpilogue
|
||||
if (!STI.hasV5TOps())
|
||||
continue;
|
||||
Reg = ARM::PC;
|
||||
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
|
||||
if (MI != MBB.end())
|
||||
MIB.copyImplicitOps(&*MI);
|
||||
MI = MBB.erase(MI);
|
||||
}
|
||||
MIB.addReg(Reg, getDefRegState(true));
|
||||
NumRegs = true;
|
||||
NeedsPop = true;
|
||||
}
|
||||
|
||||
// It's illegal to emit pop instruction without operands.
|
||||
if (NumRegs)
|
||||
if (NeedsPop)
|
||||
MBB.insert(MI, &*MIB);
|
||||
else
|
||||
MF.DeleteMachineInstr(MIB);
|
||||
|
|
|
@ -159,6 +159,7 @@ declare i32 @doSomething(i32, i32*)
|
|||
; DISABLE-V4T-NEXT: pop {r1}
|
||||
; DISABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
|
||||
; ENABLE-NEXT: bx lr
|
||||
define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
|
||||
entry:
|
||||
|
@ -270,7 +271,10 @@ for.end: ; preds = %for.body
|
|||
; Next BB.
|
||||
; SUM << 3.
|
||||
; CHECK: lsls [[SUM]], [[SUM]], #3
|
||||
; ENABLE-NEXT: pop {r4, lr}
|
||||
; ENABLE-V5T-NEXT: pop {r4, pc}
|
||||
; ENABLE-V4T-NEXT: pop {r4}
|
||||
; ENABLE-V4T-NEXT: pop {r1}
|
||||
; ENABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; Duplicated epilogue.
|
||||
; DISABLE-V5T: pop {r4, pc}
|
||||
|
@ -285,6 +289,7 @@ for.end: ; preds = %for.body
|
|||
; DISABLE-V4T-NEXT: pop {r1}
|
||||
; DISABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
|
||||
; ENABLE-NEXT: bx lr
|
||||
define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
|
||||
entry:
|
||||
|
@ -350,7 +355,10 @@ declare void @somethingElse(...)
|
|||
; Next BB.
|
||||
; SUM << 3.
|
||||
; CHECK: lsls [[SUM]], [[SUM]], #3
|
||||
; ENABLE: pop {r4, lr}
|
||||
; ENABLE-V5T-NEXT: pop {r4, pc}
|
||||
; ENABLE-V4T-NEXT: pop {r4}
|
||||
; ENABLE-V4T-NEXT: pop {r1}
|
||||
; ENABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; Duplicated epilogue.
|
||||
; DISABLE-V5T: pop {r4, pc}
|
||||
|
@ -365,6 +373,7 @@ declare void @somethingElse(...)
|
|||
; DISABLE-V4T-NEXT: pop {r1}
|
||||
; DISABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
|
||||
; ENABLE-NEXT: bx lr
|
||||
define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
|
||||
entry:
|
||||
|
@ -431,7 +440,10 @@ entry:
|
|||
;
|
||||
; Next BB.
|
||||
; CHECK: movs r0, #0
|
||||
; ENABLE-NEXT: pop {r4, lr}
|
||||
; ENABLE-V5T-NEXT: pop {r4, pc}
|
||||
; ENABLE-V4T-NEXT: pop {r4}
|
||||
; ENABLE-V4T-NEXT: pop {r1}
|
||||
; ENABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; Duplicated epilogue.
|
||||
; DISABLE-V5T-NEXT: pop {r4, pc}
|
||||
|
@ -446,6 +458,7 @@ entry:
|
|||
; DISABLE-V4T-NEXT: pop {r1}
|
||||
; DISABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
|
||||
; ENABLE-NEXT: bx lr
|
||||
define i32 @inlineAsm(i32 %cond, i32 %N) {
|
||||
entry:
|
||||
|
@ -506,7 +519,10 @@ if.end: ; preds = %for.body, %if.else
|
|||
; CHECK-NEXT: lsls r0, r0, #3
|
||||
;
|
||||
; ENABLE-NEXT: add sp, #16
|
||||
; ENABLE-NEXT: pop {[[TMP]], lr}
|
||||
; ENABLE-V5T-NEXT: pop {[[TMP]], pc}
|
||||
; ENABLE-V4T-NEXT: pop {[[TMP]]}
|
||||
; ENABLE-V4T-NEXT: pop {r1}
|
||||
; ENABLE-V4T-NEXT: bx r1
|
||||
;
|
||||
; Duplicated epilogue.
|
||||
; DISABLE-V5T-NEXT: add sp, #16
|
||||
|
@ -518,6 +534,7 @@ if.end: ; preds = %for.body, %if.else
|
|||
; CHECK: lsls r0, r1, #1
|
||||
;
|
||||
; Epilogue code.
|
||||
; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
|
||||
; ENABLE-NEXT: bx lr
|
||||
;
|
||||
; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
|
||||
|
@ -586,4 +603,89 @@ if.end:
|
|||
|
||||
declare void @abort() #0
|
||||
|
||||
define i32 @b_to_bx(i32 %value) {
|
||||
; CHECK-LABEL: b_to_bx:
|
||||
; DISABLE: push {r7, lr}
|
||||
; CHECK: cmp r1, #49
|
||||
; CHECK-NEXT: bgt [[ELSE_LABEL:LBB[0-9_]+]]
|
||||
; ENABLE: push {r7, lr}
|
||||
|
||||
; CHECK: bl
|
||||
; DISABLE-V5-NEXT: pop {r7, pc}
|
||||
; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]]
|
||||
|
||||
; ENABLE-V5-NEXT: pop {r7, pc}
|
||||
; ENABLE-V4-NEXT: pop {r7}
|
||||
; ENABLE-V4-NEXT: pop {r1}
|
||||
; ENABLE-V4-NEXT: bx r1
|
||||
|
||||
; CHECK: [[ELSE_LABEL]]: @ %if.else
|
||||
; CHECK-NEXT: lsls r0, r1, #1
|
||||
; DISABLE-V5-NEXT: pop {r7, pc}
|
||||
; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
|
||||
; DISABLE-V4T-NEXT: pop {r7}
|
||||
; DISABLE-V4T-NEXT: pop {r1}
|
||||
; DISABLE-V4T-NEXT: bx r1
|
||||
|
||||
; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
|
||||
; ENABLE-NEXT: bx lr
|
||||
|
||||
entry:
|
||||
%cmp = icmp slt i32 %value, 50
|
||||
br i1 %cmp, label %if.then, label %if.else
|
||||
|
||||
if.then:
|
||||
%div = sdiv i32 5000, %value
|
||||
br label %if.end
|
||||
|
||||
if.else:
|
||||
%mul = shl nsw i32 %value, 1
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%value.addr.0 = phi i32 [ %div, %if.then ], [ %mul, %if.else ]
|
||||
ret i32 %value.addr.0
|
||||
}
|
||||
|
||||
define i1 @beq_to_bx(i32* %y, i32 %head) {
|
||||
; CHECK-LABEL: beq_to_bx:
|
||||
; DISABLE: push {r4, lr}
|
||||
; CHECK: cmp r2, #0
|
||||
; CHECK-NEXT: beq [[EXIT_LABEL:LBB[0-9_]+]]
|
||||
; ENABLE: push {r4, lr}
|
||||
|
||||
; CHECK: tst r3, r4
|
||||
; ENABLE-NEXT: pop {r4}
|
||||
; ENABLE-NEXT: pop {r3}
|
||||
; ENABLE-NEXT: mov lr, r3
|
||||
; CHECK-NEXT: beq [[EXIT_LABEL]]
|
||||
|
||||
; CHECK: str r1, [r2]
|
||||
; CHECK-NEXT: movs r0, #0
|
||||
; CHECK-NEXT: [[EXIT_LABEL]]: @ %cleanup
|
||||
; ENABLE-NEXT: bx lr
|
||||
; DISABLE-V5-NEXT: pop {r4, pc}
|
||||
; DISABLE-V4T-NEXT: pop {r4}
|
||||
; DISABLE-V4T-NEXT: pop {r1}
|
||||
; DISABLE-V4T-NEXT: bx r1
|
||||
|
||||
entry:
|
||||
%cmp = icmp eq i32* %y, null
|
||||
br i1 %cmp, label %cleanup, label %if.end
|
||||
|
||||
if.end:
|
||||
%z = load i32, i32* %y, align 4
|
||||
%and = and i32 %z, 2
|
||||
%cmp2 = icmp eq i32 %and, 0
|
||||
br i1 %cmp2, label %cleanup, label %if.end4
|
||||
|
||||
if.end4:
|
||||
store i32 %head, i32* %y, align 4
|
||||
br label %cleanup
|
||||
|
||||
cleanup:
|
||||
%retval.0 = phi i1 [ 0, %if.end4 ], [ 1, %entry ], [ 1, %if.end ]
|
||||
ret i1 %retval.0
|
||||
}
|
||||
|
||||
attributes #0 = { noreturn nounwind }
|
||||
|
|
Loading…
Reference in New Issue