forked from OSchip/llvm-project
Followup to 132458, omit unnecessary stack copy when x87 input is a
load. rdar://problem/6373334 llvm-svn: 132696
This commit is contained in:
parent
2378cacae3
commit
e0d3426e1a
|
@ -1097,6 +1097,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||
setTargetDAGCombine(ISD::SUB);
|
||||
setTargetDAGCombine(ISD::STORE);
|
||||
setTargetDAGCombine(ISD::ZERO_EXTEND);
|
||||
setTargetDAGCombine(ISD::SINT_TO_FP);
|
||||
if (Subtarget->is64Bit())
|
||||
setTargetDAGCombine(ISD::MUL);
|
||||
|
||||
|
@ -6700,11 +6701,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
|||
DebugLoc dl = Op.getDebugLoc();
|
||||
unsigned Size = SrcVT.getSizeInBits()/8;
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
||||
SDValue Addr = Op.getOperand(0);
|
||||
if (Addr.getOpcode() == ISD::LOAD)
|
||||
return BuildFILD(Op, SrcVT, DAG.getEntryNode(), Addr, DAG);
|
||||
|
||||
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
|
||||
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
|
||||
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
|
||||
|
@ -12169,6 +12165,26 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) {
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
|
||||
// a 32-bit target where SSE doesn't support i64->FP operations.
|
||||
if (Op0.getOpcode() == ISD::LOAD) {
|
||||
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
|
||||
EVT VT = Ld->getValueType(0);
|
||||
if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
|
||||
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
|
||||
!XTLI->getSubtarget()->is64Bit() &&
|
||||
!DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
|
||||
SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG);
|
||||
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
|
||||
return FILDChain;
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
|
||||
static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
|
||||
X86TargetLowering::DAGCombinerInfo &DCI) {
|
||||
|
@ -12253,6 +12269,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
|
||||
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
|
||||
case X86ISD::FXOR:
|
||||
case X86ISD::FOR: return PerformFORCombine(N, DAG);
|
||||
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
|
||||
|
|
|
@ -689,6 +689,9 @@ namespace llvm {
|
|||
/// appropriate.
|
||||
virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
|
||||
|
||||
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
protected:
|
||||
std::pair<const TargetRegisterClass*, uint8_t>
|
||||
findRepresentativeClass(EVT VT) const;
|
||||
|
@ -780,8 +783,6 @@ namespace llvm {
|
|||
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
; RUN: llc < %s -march=x86 | FileCheck %s
|
||||
|
||||
define float @chainfail1(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
|
||||
entry:
|
||||
%tmp1 = load i64* %a, align 8
|
||||
; Insure x87 ops are properly chained, order preserved.
|
||||
; CHECK: fildll
|
||||
%conv = sitofp i64 %tmp1 to float
|
||||
; CHECK: fstps
|
||||
store float %conv, float* %f, align 4
|
||||
; CHECK: idivl
|
||||
%div = sdiv i32 %x, %y
|
||||
%conv5 = sext i32 %div to i64
|
||||
store i64 %conv5, i64* %b, align 8
|
||||
ret float %conv
|
||||
}
|
||||
|
||||
define float @chainfail2(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
|
||||
entry:
|
||||
; CHECK: movl $0,
|
||||
store i64 0, i64* %b, align 8
|
||||
%mul = mul nsw i32 %y, %x
|
||||
%sub = add nsw i32 %mul, -1
|
||||
%idxprom = sext i32 %sub to i64
|
||||
%arrayidx = getelementptr inbounds i64* %a, i64 %idxprom
|
||||
%tmp4 = load i64* %arrayidx, align 8
|
||||
; CHECK: fildll
|
||||
%conv = sitofp i64 %tmp4 to float
|
||||
store float %conv, float* %f, align 4
|
||||
ret float %conv
|
||||
}
|
Loading…
Reference in New Issue