diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 2d15b738a31a..3c3657e1f567 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -13804,3 +13804,38 @@ SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const { return SDValue(); } + +bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { + // Only duplicate to increase tail-calls for the 64bit SysV ABIs. + if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) + return false; + + // If not a tail call then no need to proceed. + if (!CI->isTailCall()) + return false; + + // If tail calls are disabled for the caller then we are done. + const Function *Caller = CI->getParent()->getParent(); + auto Attr = Caller->getFnAttribute("disable-tail-calls"); + if (Attr.getValueAsString() == "true") + return false; + + // If sibling calls have been disabled and tail-calls aren't guaranteed + // there is no reason to duplicate. + auto &TM = getTargetMachine(); + if (!TM.Options.GuaranteedTailCallOpt && DisableSCO) + return false; + + // Can't tail call a function called indirectly, or if it has variadic args. + const Function *Callee = CI->getCalledFunction(); + if (!Callee || Callee->isVarArg()) + return false; + + // Make sure the callee and caller calling conventions are eligible for tco. + if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(), + CI->getCallingConv())) + return false; + + // If the function is local then we have a good chance at tail-calling it + return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee); +} diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index bf9c4b8e63b1..c75b95691d5a 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1085,6 +1085,10 @@ namespace llvm { /// essentially v16i8 vector version of VINSERTH. SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; + // Return whether the call instruction can potentially be optimized to a + // tail call. This will cause the optimizers to attempt to move, or + // duplicate return instructions to help enable tail call optimizations. + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; }; // end class PPCTargetLowering namespace PPC { diff --git a/llvm/test/CodeGen/PowerPC/duplicate-returns-for-tailcall.ll b/llvm/test/CodeGen/PowerPC/duplicate-returns-for-tailcall.ll new file mode 100644 index 000000000000..520efd8106f4 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/duplicate-returns-for-tailcall.ll @@ -0,0 +1,63 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -stop-after codegenprepare -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -stop-after codegenprepare -mtriple=powerpc64-unknown-gnu-linux < %s | FileCheck %s + +; Function Attrs: noinline norecurse nounwind readnone +define hidden signext i32 @call1(i32 signext %a, i32 signext %b, i32 signext %c) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %b, %a + %add1 = add nsw i32 %add, %c + ret i32 %add1 +} + +; Function Attrs: nounwind +define signext i32 @test(i32 signext %a, i32 signext %b, i32 signext %c) local_unnamed_addr #1 { +entry: + %cmp = icmp eq i32 %a, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %call = tail call signext i32 @call1(i32 signext %a, i32 signext %b, i32 signext %c) + br label %return +; The return should get duplciated here to enable a tail-call opportunity. +; CHECK-LABEL: if.then: +; CHECK-NEXT: %[[T1:[a-zA-Z0-9]+]] = tail call signext i32 @call1 +; CHECK-NEXT: ret i32 %[[T1]] + +if.end: ; preds = %entry + %cmp1 = icmp slt i32 %a, %b + br i1 %cmp1, label %if.then2, label %if.end4 + +if.then2: ; preds = %if.end + %call3 = tail call signext i32 @call2(i32 signext %a, i32 signext %b, i32 signext %c) #3 + br label %return +; No duplication here since we cannot tail-call an external function anyway. +; CHECK-LABEL: if.then2: +; CHECK-NEXT: tail call signext i32 @call2 +; CHECK-NEXT: br + +if.end4: ; preds = %if.end + %cmp5 = icmp sgt i32 %b, %c + br i1 %cmp5, label %if.then6, label %return + +if.then6: ; preds = %if.end4 + %call7 = tail call fastcc signext i32 @call3(i32 signext %a, i32 signext %b, i32 signext %c) + br label %return +; No duplication here because the calling convention mismatch means we won't tail-call +; CHECK_LABEL: if.then13: +; CHECK: tail call fastcc signext i32 @call3 +; CHECK-NEXT: br + +return: ; preds = %if.end4, %if.then6, %if.then2, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ %call3, %if.then2 ], [ %call7, %if.then6 ], [ %c, %if.end4 ] + ret i32 %retval.0 +} + +declare signext i32 @call2(i32 signext, i32 signext, i32 signext) local_unnamed_addr #2 + +; Function Attrs: noinline norecurse nounwind readnone +define internal fastcc signext i32 @call3(i32 signext %a, i32 signext %b, i32 signext %c) unnamed_addr #0 { +entry: + %mul = mul nsw i32 %b, %a + %mul1 = mul nsw i32 %mul, %c + ret i32 %mul1 +}