forked from OSchip/llvm-project
[ARM][MachineOutliner] Do not overestimate LR liveness in return block
The `LiveRegUnits` utility (as well as `LivePhysRegs`) considers callee-saved registers to be alive at the point after the return instruction in a block. In the ARM backend, the `LR` register is classified as callee-saved, which is not really correct (from an ARM eABI or just common sense point of view). These two conditions cause the `MachineOutliner` to overestimate the liveness of `LR`, which results in unnecessary saves/restores of `LR` around calls to outlined sequences. It also causes the `MachineVerifer` to crash in some cases, because the save instruction reads a dead `LR`, for example when the following program: int h(int, int); int f(int a, int b, int c, int d) { a = h(a + 1, b - 1); b = b + c; return 1 + (2 * a + b) * (c - d) / (a - b) * (c + d); } int g(int a, int b, int c, int d) { a = h(a - 1, b + 1); b = b + c; return 2 + (2 * a + b) * (c - d) / (a - b) * (c + d); } is compiled with `-target arm-eabi -march=armv7-m -Oz`. This patch computes the liveness of `LR` in return blocks only, while taking into account the few ARM instructions, which read `LR`, but nevertheless the register is not mentioned (explicitly or implicitly) in the instruction operands. Differential Revision: https://reviews.llvm.org/D89189
This commit is contained in:
parent
2fc704a0a5
commit
7360d6d921
|
@ -5663,6 +5663,37 @@ ARMBaseInstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
|
|||
return 0u;
|
||||
}
|
||||
|
||||
// Compute liveness of LR at the point after the interval [I, E), which
|
||||
// denotes a *backward* iteration through instructions. Used only for return
|
||||
// basic blocks, which do not end with a tail call.
|
||||
static bool isLRAvailable(const TargetRegisterInfo &TRI,
|
||||
MachineBasicBlock::reverse_iterator I,
|
||||
MachineBasicBlock::reverse_iterator E) {
|
||||
// At the end of the function LR dead.
|
||||
bool Live = false;
|
||||
for (; I != E; ++I) {
|
||||
const MachineInstr &MI = *I;
|
||||
|
||||
// Check defs of LR.
|
||||
if (MI.modifiesRegister(ARM::LR, &TRI))
|
||||
Live = false;
|
||||
|
||||
// Check uses of LR.
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||
|
||||
Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||
|
||||
Opcode == ARM::tBXNS_RET) {
|
||||
// These instructions use LR, but it's not an (explicit or implicit)
|
||||
// operand.
|
||||
Live = true;
|
||||
continue;
|
||||
}
|
||||
if (MI.readsRegister(ARM::LR, &TRI))
|
||||
Live = true;
|
||||
}
|
||||
return !Live;
|
||||
}
|
||||
|
||||
outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
|
||||
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
|
||||
outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
|
||||
|
@ -5755,8 +5786,15 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
|
|||
|
||||
for (outliner::Candidate &C : RepeatedSequenceLocs) {
|
||||
C.initLRU(TRI);
|
||||
// Is LR available? If so, we don't need a save.
|
||||
if (C.LRU.available(ARM::LR)) {
|
||||
// LR liveness is overestimated in return blocks, unless they end with a
|
||||
// tail call.
|
||||
const auto Last = C.getMBB()->rbegin();
|
||||
const bool LRIsAvailable =
|
||||
C.getMBB()->isReturnBlock() && !Last->isCall()
|
||||
? isLRAvailable(TRI, Last,
|
||||
(MachineBasicBlock::reverse_iterator)C.front())
|
||||
: C.LRU.available(ARM::LR);
|
||||
if (LRIsAvailable) {
|
||||
FrameID = MachineOutlinerNoLRSave;
|
||||
NumBytesNoStackCalls += Costs.CallNoLRSave;
|
||||
C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);
|
||||
|
@ -5867,7 +5905,13 @@ bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
|
|||
if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
|
||||
Flags |= MachineOutlinerMBBFlags::HasCalls;
|
||||
|
||||
if (!LRU.available(ARM::LR))
|
||||
// LR liveness is overestimated in return blocks.
|
||||
|
||||
bool LRIsAvailable =
|
||||
MBB.isReturnBlock() && !MBB.back().isCall()
|
||||
? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())
|
||||
: LRU.available(ARM::LR);
|
||||
if (!LRIsAvailable)
|
||||
Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
|
||||
|
||||
return true;
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
; RUN: llc --verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv7m-unknown-unknown-eabi"
|
||||
|
||||
declare dso_local i32 @h0(i32, i32) local_unnamed_addr #1
|
||||
|
||||
define dso_local i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%add = add nsw i32 %a, 1
|
||||
%sub = add nsw i32 %b, -1
|
||||
%call = tail call i32 @h0(i32 %add, i32 %sub) #0
|
||||
%add1 = add nsw i32 %c, %b
|
||||
%mul = shl nsw i32 %call, 1
|
||||
%add2 = add nsw i32 %mul, %add1
|
||||
%sub3 = sub nsw i32 %c, %d
|
||||
%mul4 = mul nsw i32 %add2, %sub3
|
||||
%sub5 = sub nsw i32 %call, %add1
|
||||
%div = sdiv i32 %mul4, %sub5
|
||||
%add6 = add nsw i32 %d, %c
|
||||
%mul7 = mul nsw i32 %div, %add6
|
||||
%add8 = add nsw i32 %mul7, 1
|
||||
ret i32 %add8
|
||||
}
|
||||
; CHECK-LABEL: f:
|
||||
; CHECK: bl h0
|
||||
; CHECK-NEXT: bl OUTLINED_FUNCTION_0
|
||||
; CHECK-NEXT: adds r0, #1
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
|
||||
|
||||
define dso_local i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%sub = add nsw i32 %a, -1
|
||||
%add = add nsw i32 %b, 1
|
||||
%call = tail call i32 @h0(i32 %sub, i32 %add) #0
|
||||
%add1 = add nsw i32 %c, %b
|
||||
%mul = shl nsw i32 %call, 1
|
||||
%add2 = add nsw i32 %mul, %add1
|
||||
%sub3 = sub nsw i32 %c, %d
|
||||
%mul4 = mul nsw i32 %add2, %sub3
|
||||
%sub5 = sub nsw i32 %call, %add1
|
||||
%div = sdiv i32 %mul4, %sub5
|
||||
%add6 = add nsw i32 %d, %c
|
||||
%mul7 = mul nsw i32 %div, %add6
|
||||
%add8 = add nsw i32 %mul7, 2
|
||||
ret i32 %add8
|
||||
}
|
||||
; CHECK-LABEL: g:
|
||||
; CHECK: bl h0
|
||||
; CHECK-NEXT: bl OUTLINED_FUNCTION_0
|
||||
; CHECK-NEXT: adds r0, #2
|
||||
; CHECK-NEXT: pop {r4, r5, r6, pc}
|
||||
|
||||
|
||||
attributes #0 = { minsize nounwind optsize }
|
||||
attributes #1 = { minsize optsize }
|
|
@ -0,0 +1,52 @@
|
|||
; RUN: llc -verify-machineinstrs %s -o - | FileCheck %s
|
||||
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv7m-unknown-unknown-eabi"
|
||||
|
||||
declare dso_local i32 @t(i32) local_unnamed_addr #0
|
||||
|
||||
define dso_local i32 @f(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%mul = mul nsw i32 %a, 3
|
||||
%add = add nsw i32 %mul, 1
|
||||
%sub = add nsw i32 %b, -1
|
||||
%div = sdiv i32 %add, %sub
|
||||
%sub1 = sub nsw i32 %a, %c
|
||||
%div2 = sdiv i32 %div, %sub1
|
||||
%mul3 = mul nsw i32 %div2, %b
|
||||
%add4 = add nsw i32 %mul3, 1
|
||||
%call = tail call i32 @t(i32 %add4) #0
|
||||
ret i32 %call
|
||||
}
|
||||
; CHECK-LABEL: f:
|
||||
; CHECK: str lr, [sp, #-8]!
|
||||
; CHECK-NEXT: bl OUTLINED_FUNCTION_0
|
||||
; CHECK-NEXT: ldr lr, [sp], #8
|
||||
; CHECK-NEXT: adds r0, #1
|
||||
; CHECK-NEXT: b t
|
||||
|
||||
define dso_local i32 @g(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%mul = mul nsw i32 %a, 3
|
||||
%add = add nsw i32 %mul, 1
|
||||
%sub = add nsw i32 %b, -1
|
||||
%div = sdiv i32 %add, %sub
|
||||
%sub1 = sub nsw i32 %a, %c
|
||||
%div2 = sdiv i32 %div, %sub1
|
||||
%mul3 = mul nsw i32 %div2, %b
|
||||
%add4 = add nsw i32 %mul3, 3
|
||||
%call = tail call i32 @t(i32 %add4) #0
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
; CHECK-LABEL: g:
|
||||
; CHECK: str lr, [sp, #-8]!
|
||||
; CHECK-NEXT: bl OUTLINED_FUNCTION_0
|
||||
; CHECK-NEXT: ldr lr, [sp], #8
|
||||
; CHECK-NEXT: adds r0, #3
|
||||
; CHECK-NEXT: b t
|
||||
|
||||
; CHECK-LABEL: OUTLINED_FUNCTION_0:
|
||||
; CHECK-NOT: lr
|
||||
; CHECK: bx lr
|
||||
|
||||
attributes #0 = { minsize nounwind optsize }
|
Loading…
Reference in New Issue