forked from OSchip/llvm-project
Enable optimization of sin / cos pair into call to __sincos_stret for iOS7+.
rdar://12856873 Patch by Evan Cheng, with a fix for rdar://13209539 by Tilmann Scheller llvm-svn: 193942
This commit is contained in:
parent
5615aca219
commit
e7dde0c061
|
@ -869,6 +869,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||||
setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
|
setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Combine sin / cos into one node or libcall if possible.
|
||||||
|
if (Subtarget->hasSinCos()) {
|
||||||
|
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
|
||||||
|
setLibcallName(RTLIB::SINCOS_F64, "sincos");
|
||||||
|
if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
|
||||||
|
// For iOS, we don't want to the normal expansion of a libcall to
|
||||||
|
// sincos. We want to issue a libcall to __sincos_stret.
|
||||||
|
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
|
||||||
|
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// We have target-specific dag combine patterns for the following nodes:
|
// We have target-specific dag combine patterns for the following nodes:
|
||||||
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
|
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
|
||||||
|
@ -5950,6 +5962,70 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
|
||||||
Op.getOperand(1), Op.getOperand(2));
|
Op.getOperand(1), Op.getOperand(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
assert(Subtarget->isTargetDarwin());
|
||||||
|
|
||||||
|
// For iOS, we want to call an alternative entry point: __sincos_stret,
|
||||||
|
// return values are passed via sret.
|
||||||
|
SDLoc dl(Op);
|
||||||
|
SDValue Arg = Op.getOperand(0);
|
||||||
|
EVT ArgVT = Arg.getValueType();
|
||||||
|
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
|
||||||
|
|
||||||
|
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
|
||||||
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
|
|
||||||
|
// Pair of floats / doubles used to pass the result.
|
||||||
|
StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
|
||||||
|
|
||||||
|
// Create stack object for sret.
|
||||||
|
const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
|
||||||
|
const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);
|
||||||
|
int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
|
||||||
|
SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
|
||||||
|
|
||||||
|
ArgListTy Args;
|
||||||
|
ArgListEntry Entry;
|
||||||
|
|
||||||
|
Entry.Node = SRet;
|
||||||
|
Entry.Ty = RetTy->getPointerTo();
|
||||||
|
Entry.isSExt = false;
|
||||||
|
Entry.isZExt = false;
|
||||||
|
Entry.isSRet = true;
|
||||||
|
Args.push_back(Entry);
|
||||||
|
|
||||||
|
Entry.Node = Arg;
|
||||||
|
Entry.Ty = ArgTy;
|
||||||
|
Entry.isSExt = false;
|
||||||
|
Entry.isZExt = false;
|
||||||
|
Args.push_back(Entry);
|
||||||
|
|
||||||
|
const char *LibcallName = (ArgVT == MVT::f64)
|
||||||
|
? "__sincos_stret" : "__sincosf_stret";
|
||||||
|
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
|
||||||
|
|
||||||
|
TargetLowering::
|
||||||
|
CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()),
|
||||||
|
false, false, false, false, 0,
|
||||||
|
CallingConv::C, /*isTaillCall=*/false,
|
||||||
|
/*doesNotRet=*/false, /*isReturnValueUsed*/false,
|
||||||
|
Callee, Args, DAG, dl);
|
||||||
|
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
|
||||||
|
|
||||||
|
SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
|
||||||
|
MachinePointerInfo(), false, false, false, 0);
|
||||||
|
|
||||||
|
// Address of cos field.
|
||||||
|
SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,
|
||||||
|
DAG.getIntPtrConstant(ArgVT.getStoreSize()));
|
||||||
|
SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
|
||||||
|
MachinePointerInfo(), false, false, false, 0);
|
||||||
|
|
||||||
|
SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
|
||||||
|
return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
|
||||||
|
LoadSin.getValue(0), LoadCos.getValue(0));
|
||||||
|
}
|
||||||
|
|
||||||
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
|
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
|
||||||
// Monotonic load/store is legal for all targets
|
// Monotonic load/store is legal for all targets
|
||||||
if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
|
if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
|
||||||
|
@ -6081,6 +6157,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||||
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
|
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
|
||||||
case ISD::ATOMIC_LOAD:
|
case ISD::ATOMIC_LOAD:
|
||||||
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
|
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
|
||||||
|
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
|
||||||
case ISD::SDIVREM:
|
case ISD::SDIVREM:
|
||||||
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
|
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
|
||||||
}
|
}
|
||||||
|
|
|
@ -448,6 +448,7 @@ namespace llvm {
|
||||||
const ARMSubtarget *ST) const;
|
const ARMSubtarget *ST) const;
|
||||||
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||||
const ARMSubtarget *ST) const;
|
const ARMSubtarget *ST) const;
|
||||||
|
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
|
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
|
||||||
|
|
|
@ -281,6 +281,11 @@ unsigned ARMSubtarget::getMispredictionPenalty() const {
|
||||||
return SchedModel->MispredictPenalty;
|
return SchedModel->MispredictPenalty;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ARMSubtarget::hasSinCos() const {
|
||||||
|
return getTargetTriple().getOS() == Triple::IOS &&
|
||||||
|
!getTargetTriple().isOSVersionLT(7, 0);
|
||||||
|
}
|
||||||
|
|
||||||
bool ARMSubtarget::enablePostRAScheduler(
|
bool ARMSubtarget::enablePostRAScheduler(
|
||||||
CodeGenOpt::Level OptLevel,
|
CodeGenOpt::Level OptLevel,
|
||||||
TargetSubtargetInfo::AntiDepBreakMode& Mode,
|
TargetSubtargetInfo::AntiDepBreakMode& Mode,
|
||||||
|
|
|
@ -330,6 +330,10 @@ public:
|
||||||
const std::string & getCPUString() const { return CPUString; }
|
const std::string & getCPUString() const { return CPUString; }
|
||||||
|
|
||||||
unsigned getMispredictionPenalty() const;
|
unsigned getMispredictionPenalty() const;
|
||||||
|
|
||||||
|
/// This function returns true if the target has sincos() routine in its
|
||||||
|
/// compiler runtime or math libraries.
|
||||||
|
bool hasSinCos() const;
|
||||||
|
|
||||||
/// enablePostRAScheduler - True at 'More' optimization.
|
/// enablePostRAScheduler - True at 'More' optimization.
|
||||||
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
|
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
; RUN: llc < %s -mtriple=armv7-apple-ios6 -mcpu=cortex-a8 | FileCheck %s --check-prefix=NOOPT
|
||||||
|
; RUN: llc < %s -mtriple=armv7-apple-ios7 -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS
|
||||||
|
|
||||||
|
; Combine sin / cos into a single call.
|
||||||
|
; rdar://12856873
|
||||||
|
|
||||||
|
define float @test1(float %x) nounwind {
|
||||||
|
entry:
|
||||||
|
; SINCOS-LABEL: test1:
|
||||||
|
; SINCOS: bl ___sincosf_stret
|
||||||
|
|
||||||
|
; NOOPT-LABEL: test1:
|
||||||
|
; NOOPT: bl _sinf
|
||||||
|
; NOOPT: bl _cosf
|
||||||
|
%call = tail call float @sinf(float %x) nounwind readnone
|
||||||
|
%call1 = tail call float @cosf(float %x) nounwind readnone
|
||||||
|
%add = fadd float %call, %call1
|
||||||
|
ret float %add
|
||||||
|
}
|
||||||
|
|
||||||
|
define double @test2(double %x) nounwind {
|
||||||
|
entry:
|
||||||
|
; SINCOS-LABEL: test2:
|
||||||
|
; SINCOS: bl ___sincos_stret
|
||||||
|
|
||||||
|
; NOOPT-LABEL: test2:
|
||||||
|
; NOOPT: bl _sin
|
||||||
|
; NOOPT: bl _cos
|
||||||
|
%call = tail call double @sin(double %x) nounwind readnone
|
||||||
|
%call1 = tail call double @cos(double %x) nounwind readnone
|
||||||
|
%add = fadd double %call, %call1
|
||||||
|
ret double %add
|
||||||
|
}
|
||||||
|
|
||||||
|
declare float @sinf(float) readonly
|
||||||
|
declare double @sin(double) readonly
|
||||||
|
declare float @cosf(float) readonly
|
||||||
|
declare double @cos(double) readonly
|
Loading…
Reference in New Issue