forked from OSchip/llvm-project
[ARM] Prefer indirect calls in minsize mode
... When we emit several calls to the same function in the same basic block. An indirect call uses a "BLX r0" instruction which has a 16-bit encoding. If many calls are made to the same target, this can enable significant code size reductions. llvm-svn: 275537
This commit is contained in:
parent
a56f8f8e58
commit
a454a11d60
|
@ -1849,36 +1849,50 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
false, false, 0);
|
||||
}
|
||||
} else if (isa<GlobalAddressSDNode>(Callee)) {
|
||||
isDirect = true;
|
||||
bool isDef = GV->isStrongDefinitionForLinker();
|
||||
// If we're optimizing for minimum size and the function is called many
|
||||
// times in this block, we can improve codesize by calling indirectly
|
||||
// as BLXr has a 16-bit encoding.
|
||||
auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
|
||||
auto *BB = CLI.CS->getParent();
|
||||
bool PreferIndirect =
|
||||
Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
|
||||
std::count_if(GV->user_begin(), GV->user_end(), [&BB](const User *U) {
|
||||
return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
|
||||
}) > 2;
|
||||
|
||||
// ARM call to a local ARM function is predicable.
|
||||
isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
|
||||
// tBX takes a register source operand.
|
||||
if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
|
||||
assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
|
||||
Callee = DAG.getNode(
|
||||
ARMISD::WrapperPIC, dl, PtrVt,
|
||||
DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
|
||||
Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
|
||||
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
|
||||
false, false, true, 0);
|
||||
} else if (Subtarget->isTargetCOFF()) {
|
||||
assert(Subtarget->isTargetWindows() &&
|
||||
"Windows is the only supported COFF target");
|
||||
unsigned TargetFlags = GV->hasDLLImportStorageClass()
|
||||
? ARMII::MO_DLLIMPORT
|
||||
: ARMII::MO_NO_FLAG;
|
||||
Callee =
|
||||
DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags);
|
||||
if (GV->hasDLLImportStorageClass())
|
||||
if (!PreferIndirect) {
|
||||
isDirect = true;
|
||||
bool isDef = GV->isStrongDefinitionForLinker();
|
||||
|
||||
// ARM call to a local ARM function is predicable.
|
||||
isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
|
||||
// tBX takes a register source operand.
|
||||
if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
|
||||
assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
|
||||
Callee = DAG.getNode(
|
||||
ARMISD::WrapperPIC, dl, PtrVt,
|
||||
DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
|
||||
Callee =
|
||||
DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
|
||||
DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
|
||||
DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
|
||||
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
|
||||
false, false, false, 0);
|
||||
} else {
|
||||
Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
|
||||
false, false, true, 0);
|
||||
} else if (Subtarget->isTargetCOFF()) {
|
||||
assert(Subtarget->isTargetWindows() &&
|
||||
"Windows is the only supported COFF target");
|
||||
unsigned TargetFlags = GV->hasDLLImportStorageClass()
|
||||
? ARMII::MO_DLLIMPORT
|
||||
: ARMII::MO_NO_FLAG;
|
||||
Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
|
||||
TargetFlags);
|
||||
if (GV->hasDLLImportStorageClass())
|
||||
Callee =
|
||||
DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
|
||||
DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
|
||||
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
|
||||
false, false, false, 0);
|
||||
} else {
|
||||
Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
|
||||
}
|
||||
}
|
||||
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
|
||||
isDirect = true;
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "thumbv7m-arm-none-eabi"
|
||||
|
||||
; CHECK-LABEL: f:
|
||||
; CHECK: blx r
|
||||
; CHECK: blx r
|
||||
; CHECK: blx r
|
||||
define void @f() minsize optsize {
|
||||
entry:
|
||||
call void @g(i32 45, i32 66)
|
||||
call void @g(i32 88, i32 32)
|
||||
call void @g(i32 55, i32 33)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: h:
|
||||
; CHECK: bl g
|
||||
; CHECK: bl g
|
||||
define void @h() minsize optsize {
|
||||
entry:
|
||||
call void @g(i32 45, i32 66)
|
||||
call void @g(i32 88, i32 32)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @g(i32,i32)
|
Loading…
Reference in New Issue