Followup to 132458, omit unnecessary stack copy when x87 input is a

load.  rdar://problem/6373334

llvm-svn: 132696
This commit is contained in:
Stuart Hastings 2011-06-06 23:15:58 +00:00
parent 2378cacae3
commit e0d3426e1a
3 changed files with 56 additions and 7 deletions

View File

@ -1097,6 +1097,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SINT_TO_FP);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
@ -6700,11 +6701,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
DebugLoc dl = Op.getDebugLoc();
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
SDValue Addr = Op.getOperand(0);
if (Addr.getOpcode() == ISD::LOAD)
return BuildFILD(Op, SrcVT, DAG.getEntryNode(), Addr, DAG);
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
@ -12169,6 +12165,26 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) {
DebugLoc dl = N->getDebugLoc();
SDValue Op0 = N->getOperand(0);
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
// a 32-bit target where SSE doesn't support i64->FP operations.
if (Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
EVT VT = Ld->getValueType(0);
if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!XTLI->getSubtarget()->is64Bit() &&
!DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
return FILDChain;
}
}
return SDValue();
}
// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
X86TargetLowering::DAGCombinerInfo &DCI) {
@ -12253,6 +12269,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);

View File

@ -689,6 +689,9 @@ namespace llvm {
/// appropriate.
virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG) const;
protected:
std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(EVT VT) const;
@ -780,8 +783,6 @@ namespace llvm {
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG) const;
SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;

View File

@ -0,0 +1,31 @@
; RUN: llc < %s -march=x86 | FileCheck %s
define float @chainfail1(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
entry:
%tmp1 = load i64* %a, align 8
; Insure x87 ops are properly chained, order preserved.
; CHECK: fildll
%conv = sitofp i64 %tmp1 to float
; CHECK: fstps
store float %conv, float* %f, align 4
; CHECK: idivl
%div = sdiv i32 %x, %y
%conv5 = sext i32 %div to i64
store i64 %conv5, i64* %b, align 8
ret float %conv
}
define float @chainfail2(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
entry:
; CHECK: movl $0,
store i64 0, i64* %b, align 8
%mul = mul nsw i32 %y, %x
%sub = add nsw i32 %mul, -1
%idxprom = sext i32 %sub to i64
%arrayidx = getelementptr inbounds i64* %a, i64 %idxprom
%tmp4 = load i64* %arrayidx, align 8
; CHECK: fildll
%conv = sitofp i64 %tmp4 to float
store float %conv, float* %f, align 4
ret float %conv
}