diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 204bdcc329c0..93df8cf69dc9 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4255,13 +4255,25 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, static bool isFunctionGlobalAddress(SDValue Callee); static bool -resideInSameSection(const Function *Caller, SDValue Callee, +callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM) { // If !G, Callee can be an external symbol. GlobalAddressSDNode *G = dyn_cast(Callee); if (!G) return false; + // The medium and large code models are expected to provide a sufficiently + // large TOC to provide all data addressing needs of a module with a + // single TOC. Since each module will be addressed with a single TOC then we + // only need to check that caller and callee don't cross dso boundaries. + if (CodeModel::Medium == TM.getCodeModel() || + CodeModel::Large == TM.getCodeModel()) + return TM.shouldAssumeDSOLocal(*Caller->getParent(), G->getGlobal()); + + // Otherwise we need to ensure callee and caller are in the same section, + // since the linker may allocate multiple TOCs, and we don't know which + // sections will belong to the same TOC base. + const GlobalValue *GV = G->getGlobal(); if (!GV->isStrongDefinitionForLinker()) return false; @@ -4410,11 +4422,10 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( !isa(Callee)) return false; - // Check if Callee resides in the same section, because for now, PPC64 SVR4 - // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another - // section. + // If the caller and callee potentially have different TOC bases then we + // cannot tail call since we need to restore the TOC pointer after the call. // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 - if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine())) + if (!callsShareTOCBase(MF.getFunction(), Callee, getTargetMachine())) return false; // TCO allows altering callee ABI, so we don't have to check further. @@ -4996,7 +5007,7 @@ SDValue PPCTargetLowering::FinishCall( // any other variadic arguments). Ops.insert(std::next(Ops.begin()), AddTOC); } else if (CallOpc == PPCISD::CALL && - !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) { + !callsShareTOCBase(MF.getFunction(), Callee, DAG.getTarget())) { // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; } diff --git a/llvm/test/CodeGen/PowerPC/ppc64-blnop.ll b/llvm/test/CodeGen/PowerPC/ppc64-blnop.ll index 2fe23f91c83d..3b3d9add183e 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-blnop.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-blnop.ll @@ -5,6 +5,8 @@ ; RUN: llc < %s -function-sections -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK-FS ; RUN: llc < %s -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s ; RUN: llc < %s -function-sections -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK-FS +; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -code-model=small -mcpu=pwr8 | FileCheck %s -check-prefix=SCM %class.T = type { [2 x i8] } @@ -74,7 +76,11 @@ define void @wo_hcaller(%class.T* %this, i8* %c) { ; CHECK-LABEL: wo_hcaller: ; CHECK: bl wo_hcallee -; CHECK-NEXT: nop +; CHECK-NOT: nop + +; SCM-LABEL: wo_hcaller: +; SCM: bl wo_hcallee +; SCM-NEXT: nop } define weak_odr protected void @wo_pcallee(%class.T* %this, i8* %c) { ret void } @@ -84,7 +90,11 @@ define void @wo_pcaller(%class.T* %this, i8* %c) { ; CHECK-LABEL: wo_pcaller: ; CHECK: bl wo_pcallee -; CHECK-NEXT: nop +; CHECK-NOT: nop + +; SCM-LABEL: wo_pcaller: +; SCM: bl wo_pcallee +; SCM-NEXT: nop } define weak_odr void @wo_callee(%class.T* %this, i8* %c) { ret void } @@ -104,7 +114,11 @@ define void @w_pcaller(i8* %ptr) { ; CHECK-LABEL: w_pcaller: ; CHECK: bl w_pcallee -; CHECK-NEXT: nop +; CHECK-NOT: nop + +; SCM-LABEL: w_pcaller: +; SCM: bl w_pcallee +; SCM-NEXT: nop } define weak hidden void @w_hcallee(i8* %ptr) { ret void } @@ -114,7 +128,11 @@ define void @w_hcaller(i8* %ptr) { ; CHECK-LABEL: w_hcaller: ; CHECK: bl w_hcallee -; CHECK-NEXT: nop +; CHECK-NOT: nop + +; SCM-LABEL: w_hcaller: +; SCM: bl w_hcallee +; SCM-NEXT: nop } define weak void @w_callee(i8* %ptr) { ret void } diff --git a/llvm/test/CodeGen/PowerPC/ppc64-calls.ll b/llvm/test/CodeGen/PowerPC/ppc64-calls.ll index d4831ffa7a43..245056f7add9 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-calls.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-calls.ll @@ -1,4 +1,6 @@ ; RUN: llc -relocation-model=static -verify-machineinstrs < %s -mcpu=pwr7 | FileCheck %s +; RUN: llc -relocation-model=static -verify-machineinstrs < %s -code-model=small -mcpu=pwr7 | FileCheck %s -check-prefix=SCM + target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -21,13 +23,22 @@ define void @test_direct() nounwind readnone { ret void } -; Calls to weak function requires a TOC restore 'nop' because they -; may be overridden in a different module. +; Calls to weak function requires a TOC restore 'nop' with the small codemodel +; because the definition that gets choosen at link time may come from a +; different section even though we have seen a weak definition in the same +; section at compile time. +; With large and medium codemodels no TOC restore is needed, since we know +; whichever definition is choosen it resides within the same DSO boundaries and +; therefore shares the same TOC. define void @test_weak() nounwind readnone { -; CHECK-LABEL: test_weak: tail call void @foo_weak() nounwind -; CHECK: bl foo -; CHECK-NEXT: nop +; CHECK-LABEL: test_weak: +; CHECK: b foo_weak +; CHECK-NOT: nop + +; SCM-LABEL: test_weak: +; SCM: bl foo_weak +; SCM-NEXT: nop ret void } diff --git a/llvm/test/CodeGen/PowerPC/ppc64-sibcall.ll b/llvm/test/CodeGen/PowerPC/ppc64-sibcall.ll index 59e545601475..3c08ecb5119f 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-sibcall.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-sibcall.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s -check-prefix=CHECK-SCO ; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX ; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-SCO-HASQPX +; RUN: llc < %s -relocation-model=static -O1 -disable-ppc-sco=false -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -code-model=small | FileCheck %s -check-prefix=SCM ; No combination of "powerpc64le-unknown-linux-gnu" + "CHECK-SCO", because ; only Power8 (and later) fully support LE. @@ -142,7 +143,10 @@ define void @wo_hcaller(%class.T* %this, i8* %c) { ret void ; CHECK-SCO-LABEL: wo_hcaller: -; CHECK-SCO: bl wo_hcallee +; CHECK-SCO: b wo_hcallee + +; SCM-LABEL: wo_hcaller: +; SCM: bl wo_hcallee } define weak_odr protected void @wo_pcallee(%class.T* %this, i8* %c) { ret void } @@ -151,7 +155,10 @@ define void @wo_pcaller(%class.T* %this, i8* %c) { ret void ; CHECK-SCO-LABEL: wo_pcaller: -; CHECK-SCO: bl wo_pcallee +; CHECK-SCO: b wo_pcallee + +; SCM-LABEL: wo_pcaller: +; SCM: bl wo_pcallee } define weak_odr void @wo_callee(%class.T* %this, i8* %c) { ret void } @@ -160,7 +167,10 @@ define void @wo_caller(%class.T* %this, i8* %c) { ret void ; CHECK-SCO-LABEL: wo_caller: -; CHECK-SCO: bl wo_callee +; CHECK-SCO: b wo_callee + +; SCM-LABEL: wo_caller: +; SCM: bl wo_callee } define weak protected void @w_pcallee(i8* %ptr) { ret void } @@ -169,7 +179,10 @@ define void @w_pcaller(i8* %ptr) { ret void ; CHECK-SCO-LABEL: w_pcaller: -; CHECK-SCO: bl w_pcallee +; CHECK-SCO: b w_pcallee + +; SCM-LABEL: w_pcaller: +; SCM: bl w_pcallee } define weak hidden void @w_hcallee(i8* %ptr) { ret void } @@ -178,7 +191,10 @@ define void @w_hcaller(i8* %ptr) { ret void ; CHECK-SCO-LABEL: w_hcaller: -; CHECK-SCO: bl w_hcallee +; CHECK-SCO: b w_hcallee + +; SCM-LABEL: w_hcaller: +; SCM: bl w_hcallee } define weak void @w_callee(i8* %ptr) { ret void } @@ -187,7 +203,10 @@ define void @w_caller(i8* %ptr) { ret void ; CHECK-SCO-LABEL: w_caller: -; CHECK-SCO: bl w_callee +; CHECK-SCO: b w_callee + +; SCM-LABEL: w_caller: +; SCM: bl w_callee } %struct.byvalTest = type { [8 x i8] }