Enable optimization of sin / cos pair into call to __sincos_stret for iOS7+.

rdar://12856873 Patch by Evan Cheng, with a fix for rdar://13209539 by Tilmann Scheller llvm-svn: 193942
2013-11-03 06:14:38 +00:00 · 2013-11-03 06:14:38 +00:00 · e7dde0c061
parent 5615aca219
commit e7dde0c061
5 changed files with 125 additions and 0 deletions
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@ -869,6 +869,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
    }
  }
  // Combine sin / cos into one node or libcall if possible.
  if (Subtarget->hasSinCos()) {
    setLibcallName(RTLIB::SINCOS_F32, "sincosf");
    setLibcallName(RTLIB::SINCOS_F64, "sincos");
    if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
      // For iOS, we don't want to the normal expansion of a libcall to
      // sincos. We want to issue a libcall to __sincos_stret.
      setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
      setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
    }
  }
  // We have target-specific dag combine patterns for the following nodes:
  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
@ -5950,6 +5962,70 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
                     Op.getOperand(1), Op.getOperand(2));
 }
 SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
  assert(Subtarget->isTargetDarwin());
  // For iOS, we want to call an alternative entry point: __sincos_stret,
  // return values are passed via sret.
  SDLoc dl(Op);
  SDValue Arg = Op.getOperand(0);
  EVT ArgVT = Arg.getValueType();
  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  // Pair of floats / doubles used to pass the result.
  StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
  // Create stack object for sret.
  const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
  const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);
  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
  SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
  ArgListTy Args;
  ArgListEntry Entry;
  Entry.Node = SRet;
  Entry.Ty = RetTy->getPointerTo();
  Entry.isSExt = false;
  Entry.isZExt = false;
  Entry.isSRet = true;
  Args.push_back(Entry);
  Entry.Node = Arg;
  Entry.Ty = ArgTy;
  Entry.isSExt = false;
  Entry.isZExt = false;
  Args.push_back(Entry);
  const char *LibcallName  = (ArgVT == MVT::f64)
  ? "__sincos_stret" : "__sincosf_stret";
  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
  TargetLowering::
  CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()),
                       false, false, false, false, 0,
                       CallingConv::C, /*isTaillCall=*/false,
                       /*doesNotRet=*/false, /*isReturnValueUsed*/false,
                       Callee, Args, DAG, dl);
  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
  SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
                                MachinePointerInfo(), false, false, false, 0);
  // Address of cos field.
  SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,
                            DAG.getIntPtrConstant(ArgVT.getStoreSize()));
  SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
                                MachinePointerInfo(), false, false, false, 0);
  SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
                     LoadSin.getValue(0), LoadCos.getValue(0));
 }
 static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
  // Monotonic load/store is legal for all targets
  if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
@ -6081,6 +6157,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  case ISD::SUBE:          return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
  case ISD::ATOMIC_LOAD:
  case ISD::ATOMIC_STORE:  return LowerAtomicLoadStore(Op, DAG);
  case ISD::FSINCOS:       return LowerFSINCOS(Op, DAG);
  case ISD::SDIVREM:
  case ISD::UDIVREM:       return LowerDivRem(Op, DAG);
  }
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@ -448,6 +448,7 @@ namespace llvm {
                            const ARMSubtarget *ST) const;
    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
                              const ARMSubtarget *ST) const;
    SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
    SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
    /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@ -281,6 +281,11 @@ unsigned ARMSubtarget::getMispredictionPenalty() const {
  return SchedModel->MispredictPenalty;
 }
 bool ARMSubtarget::hasSinCos() const {
  return getTargetTriple().getOS() == Triple::IOS &&
    !getTargetTriple().isOSVersionLT(7, 0);
 }
 bool ARMSubtarget::enablePostRAScheduler(
           CodeGenOpt::Level OptLevel,
           TargetSubtargetInfo::AntiDepBreakMode& Mode,
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@ -330,6 +330,10 @@ public:
  const std::string & getCPUString() const { return CPUString; }
  unsigned getMispredictionPenalty() const;
  /// This function returns true if the target has sincos() routine in its
  /// compiler runtime or math libraries.
  bool hasSinCos() const;
  /// enablePostRAScheduler - True at 'More' optimization.
  bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
--- a/llvm/test/CodeGen/ARM/sincos.ll
+++ b/llvm/test/CodeGen/ARM/sincos.ll
@ -0,0 +1,38 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios6 -mcpu=cortex-a8 | FileCheck %s --check-prefix=NOOPT
 ; RUN: llc < %s -mtriple=armv7-apple-ios7 -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS
 ; Combine sin / cos into a single call.
 ; rdar://12856873
 define float @test1(float %x) nounwind {
 entry:
 ; SINCOS-LABEL: test1:
 ; SINCOS: bl ___sincosf_stret
 ; NOOPT-LABEL: test1:
 ; NOOPT: bl _sinf
 ; NOOPT: bl _cosf
  %call = tail call float @sinf(float %x) nounwind readnone
  %call1 = tail call float @cosf(float %x) nounwind readnone
  %add = fadd float %call, %call1
  ret float %add
 }
 define double @test2(double %x) nounwind {
 entry:
 ; SINCOS-LABEL: test2:
 ; SINCOS: bl ___sincos_stret
 ; NOOPT-LABEL: test2:
 ; NOOPT: bl _sin
 ; NOOPT: bl _cos
  %call = tail call double @sin(double %x) nounwind readnone
  %call1 = tail call double @cos(double %x) nounwind readnone
  %add = fadd double %call, %call1
  ret double %add
 }
 declare float  @sinf(float) readonly
 declare double @sin(double) readonly
 declare float @cosf(float) readonly
 declare double @cos(double) readonly