[ARM] Prefer indirect calls in minsize mode

... When we emit several calls to the same function in the same basic block.

An indirect call uses a "BLX r0" instruction which has a 16-bit encoding. If many calls are made to the same target, this can enable significant code size reductions.

llvm-svn: 275537
This commit is contained in:
James Molloy 2016-07-15 07:55:21 +00:00
parent a56f8f8e58
commit a454a11d60
2 changed files with 69 additions and 27 deletions

View File

@ -1849,36 +1849,50 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
false, false, 0);
}
} else if (isa<GlobalAddressSDNode>(Callee)) {
isDirect = true;
bool isDef = GV->isStrongDefinitionForLinker();
// If we're optimizing for minimum size and the function is called many
// times in this block, we can improve codesize by calling indirectly
// as BLXr has a 16-bit encoding.
auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
auto *BB = CLI.CS->getParent();
bool PreferIndirect =
Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
std::count_if(GV->user_begin(), GV->user_end(), [&BB](const User *U) {
return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
}) > 2;
// ARM call to a local ARM function is predicable.
isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
// tBX takes a register source operand.
if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
Callee = DAG.getNode(
ARMISD::WrapperPIC, dl, PtrVt,
DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
false, false, true, 0);
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
unsigned TargetFlags = GV->hasDLLImportStorageClass()
? ARMII::MO_DLLIMPORT
: ARMII::MO_NO_FLAG;
Callee =
DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags);
if (GV->hasDLLImportStorageClass())
if (!PreferIndirect) {
isDirect = true;
bool isDef = GV->isStrongDefinitionForLinker();
// ARM call to a local ARM function is predicable.
isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
// tBX takes a register source operand.
if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
Callee = DAG.getNode(
ARMISD::WrapperPIC, dl, PtrVt,
DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
Callee =
DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
false, false, false, 0);
} else {
Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
false, false, true, 0);
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
unsigned TargetFlags = GV->hasDLLImportStorageClass()
? ARMII::MO_DLLIMPORT
: ARMII::MO_NO_FLAG;
Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
TargetFlags);
if (GV->hasDLLImportStorageClass())
Callee =
DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
false, false, false, 0);
} else {
Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
}
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;

View File

@ -0,0 +1,28 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv7m-arm-none-eabi"
; CHECK-LABEL: f:
; CHECK: blx r
; CHECK: blx r
; CHECK: blx r
define void @f() minsize optsize {
entry:
call void @g(i32 45, i32 66)
call void @g(i32 88, i32 32)
call void @g(i32 55, i32 33)
ret void
}
; CHECK-LABEL: h:
; CHECK: bl g
; CHECK: bl g
define void @h() minsize optsize {
entry:
call void @g(i32 45, i32 66)
call void @g(i32 88, i32 32)
ret void
}
declare void @g(i32,i32)