diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index b902b6056fca..1542934aa779 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1849,36 +1849,50 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, false, false, 0); } } else if (isa(Callee)) { - isDirect = true; - bool isDef = GV->isStrongDefinitionForLinker(); + // If we're optimizing for minimum size and the function is called many + // times in this block, we can improve codesize by calling indirectly + // as BLXr has a 16-bit encoding. + auto *GV = cast(Callee)->getGlobal(); + auto *BB = CLI.CS->getParent(); + bool PreferIndirect = + Subtarget->isThumb() && MF.getFunction()->optForMinSize() && + std::count_if(GV->user_begin(), GV->user_end(), [&BB](const User *U) { + return isa(U) && cast(U)->getParent() == BB; + }) > 2; - // ARM call to a local ARM function is predicable. - isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); - // tBX takes a register source operand. - if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { - assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); - Callee = DAG.getNode( - ARMISD::WrapperPIC, dl, PtrVt, - DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); - Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, true, 0); - } else if (Subtarget->isTargetCOFF()) { - assert(Subtarget->isTargetWindows() && - "Windows is the only supported COFF target"); - unsigned TargetFlags = GV->hasDLLImportStorageClass() - ? ARMII::MO_DLLIMPORT - : ARMII::MO_NO_FLAG; - Callee = - DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags); - if (GV->hasDLLImportStorageClass()) + if (!PreferIndirect) { + isDirect = true; + bool isDef = GV->isStrongDefinitionForLinker(); + + // ARM call to a local ARM function is predicable. + isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); + // tBX takes a register source operand. + if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { + assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); + Callee = DAG.getNode( + ARMISD::WrapperPIC, dl, PtrVt, + DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); Callee = - DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), - DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), + DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(DAG.getMachineFunction()), - false, false, false, 0); - } else { - Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0); + false, false, true, 0); + } else if (Subtarget->isTargetCOFF()) { + assert(Subtarget->isTargetWindows() && + "Windows is the only supported COFF target"); + unsigned TargetFlags = GV->hasDLLImportStorageClass() + ? ARMII::MO_DLLIMPORT + : ARMII::MO_NO_FLAG; + Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, + TargetFlags); + if (GV->hasDLLImportStorageClass()) + Callee = + DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), + DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, false, false, 0); + } else { + Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0); + } } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { isDirect = true; diff --git a/llvm/test/CodeGen/ARM/minsize-call-cse.ll b/llvm/test/CodeGen/ARM/minsize-call-cse.ll new file mode 100644 index 000000000000..072b76f03ba3 --- /dev/null +++ b/llvm/test/CodeGen/ARM/minsize-call-cse.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; CHECK-LABEL: f: +; CHECK: blx r +; CHECK: blx r +; CHECK: blx r +define void @f() minsize optsize { +entry: + call void @g(i32 45, i32 66) + call void @g(i32 88, i32 32) + call void @g(i32 55, i32 33) + ret void +} + +; CHECK-LABEL: h: +; CHECK: bl g +; CHECK: bl g +define void @h() minsize optsize { +entry: + call void @g(i32 45, i32 66) + call void @g(i32 88, i32 32) + ret void +} + +declare void @g(i32,i32)