From e36c400255fcb3ef3def139a8a8d01f1ce875fb6 Mon Sep 17 00:00:00 2001 From: Dale Johannesen Date: Sun, 23 Sep 2007 14:52:20 +0000 Subject: [PATCH] Fix PR 1681. When X86 target uses +sse -sse2, keep f32 in SSE registers and f64 in x87. This is effectively a new codegen mode. Change addLegalFPImmediate to permit float and double variants to do different things. Adjust callers. llvm-svn: 42246 --- llvm/include/llvm/Target/TargetLowering.h | 10 -- llvm/lib/Target/Alpha/AlphaISelLowering.cpp | 2 + llvm/lib/Target/IA64/IA64ISelLowering.cpp | 2 + llvm/lib/Target/X86/X86ISelLowering.cpp | 99 +++++++++++--- llvm/lib/Target/X86/X86ISelLowering.h | 8 +- llvm/lib/Target/X86/X86InstrFPStack.td | 135 ++++++++++---------- llvm/lib/Target/X86/X86InstrInfo.td | 3 +- llvm/lib/Target/X86/X86InstrSSE.td | 2 +- 8 files changed, 164 insertions(+), 97 deletions(-) diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 88b382327e6a..8586d7f09162 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -783,17 +783,7 @@ protected: /// addLegalFPImmediate - Indicate that this target can instruction select /// the specified FP immediate natively. void addLegalFPImmediate(const APFloat& Imm) { - // Incoming constants are expected to be double. We also add - // the float version. It is expected that all constants are exactly - // representable as floats. - assert(&Imm.getSemantics() == &APFloat::IEEEdouble); - APFloat Immf = APFloat(Imm); - // Rounding mode is not supposed to matter here... - if (Immf.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven) != - APFloat::opOK) - assert(0); LegalFPImmediates.push_back(Imm); - LegalFPImmediates.push_back(Immf); } /// setTargetDAGCombine - Targets should invoke this method for each target diff --git a/llvm/lib/Target/Alpha/AlphaISelLowering.cpp b/llvm/lib/Target/Alpha/AlphaISelLowering.cpp index a3654af114f6..45a271d0ef8a 100644 --- a/llvm/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/llvm/lib/Target/Alpha/AlphaISelLowering.cpp @@ -140,7 +140,9 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) setOperationAction(ISD::ConstantFP, MVT::f64, Expand); setOperationAction(ISD::ConstantFP, MVT::f32, Expand); addLegalFPImmediate(APFloat(+0.0)); //F31 + addLegalFPImmediate(APFloat(+0.0f)); //F31 addLegalFPImmediate(APFloat(-0.0)); //-F31 + addLegalFPImmediate(APFloat(-0.0f)); //-F31 setJumpBufSize(272); setJumpBufAlignment(16); diff --git a/llvm/lib/Target/IA64/IA64ISelLowering.cpp b/llvm/lib/Target/IA64/IA64ISelLowering.cpp index f1bd5ba86f42..0bbda9add6ba 100644 --- a/llvm/lib/Target/IA64/IA64ISelLowering.cpp +++ b/llvm/lib/Target/IA64/IA64ISelLowering.cpp @@ -120,7 +120,9 @@ IA64TargetLowering::IA64TargetLowering(TargetMachine &TM) setOperationAction(ISD::ConstantFP, MVT::f64, Expand); addLegalFPImmediate(APFloat(+0.0)); + addLegalFPImmediate(APFloat(+0.0f)); addLegalFPImmediate(APFloat(+1.0)); + addLegalFPImmediate(APFloat(+1.0f)); } const char *IA64TargetLowering::getTargetNodeName(unsigned Opcode) const { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e79ee90eae89..e52b6531faf4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -40,7 +40,8 @@ using namespace llvm; X86TargetLowering::X86TargetLowering(TargetMachine &TM) : TargetLowering(TM) { Subtarget = &TM.getSubtarget(); - X86ScalarSSE = Subtarget->hasSSE2(); + X86ScalarSSEf64 = Subtarget->hasSSE2(); + X86ScalarSSEf32 = Subtarget->hasSSE1(); X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; RegInfo = TM.getRegisterInfo(); @@ -87,7 +88,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); } else { - if (X86ScalarSSE) + if (X86ScalarSSEf64) // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP. setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Expand); else @@ -99,7 +100,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); // SSE has no i16 to fp conversion, only i32 - if (X86ScalarSSE) { + if (X86ScalarSSEf32) { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); // f32 and f64 cases are Legal, f80 case is not setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); @@ -118,7 +119,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); - if (X86ScalarSSE) { + if (X86ScalarSSEf32) { setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); // f32 and f64 cases are Legal, f80 case is not setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); @@ -137,7 +138,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); } else { - if (X86ScalarSSE && !Subtarget->hasSSE3()) + if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) // Expand FP_TO_UINT into a select. // FIXME: We would like to use a Custom expander here eventually to do // the optimal thing for SSE vs. the default expansion in the legalizer. @@ -148,7 +149,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) } // TODO: when we have SSE, these could be more efficient, by using movd/movq. - if (!X86ScalarSSE) { + if (!X86ScalarSSEf64) { setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); } @@ -271,7 +272,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) else setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - if (X86ScalarSSE) { + if (X86ScalarSSEf64) { + // f32 and f64 use SSE. // Set up the FP register classes. addRegisterClass(MVT::f32, X86::FR32RegisterClass); addRegisterClass(MVT::f64, X86::FR64RegisterClass); @@ -300,7 +302,8 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) // cases we handle. setOperationAction(ISD::ConstantFP, MVT::f64, Expand); setOperationAction(ISD::ConstantFP, MVT::f32, Expand); - addLegalFPImmediate(APFloat(+0.0)); // xorps / xorpd + addLegalFPImmediate(APFloat(+0.0)); // xorpd + addLegalFPImmediate(APFloat(+0.0f)); // xorps // Conversions to long double (in X87) go through memory. setConvertAction(MVT::f32, MVT::f80, Expand); @@ -309,7 +312,55 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) // Conversions from long double (in X87) go through memory. setConvertAction(MVT::f80, MVT::f32, Expand); setConvertAction(MVT::f80, MVT::f64, Expand); + } else if (X86ScalarSSEf32) { + // Use SSE for f32, x87 for f64. + // Set up the FP register classes. + addRegisterClass(MVT::f32, X86::FR32RegisterClass); + addRegisterClass(MVT::f64, X86::RFP64RegisterClass); + + // Use ANDPS to simulate FABS. + setOperationAction(ISD::FABS , MVT::f32, Custom); + + // Use XORP to simulate FNEG. + setOperationAction(ISD::FNEG , MVT::f32, Custom); + + setOperationAction(ISD::UNDEF, MVT::f64, Expand); + + // Use ANDPS and ORPS to simulate FCOPYSIGN. + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); + + // We don't support sin/cos/fmod + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FREM , MVT::f32, Expand); + + // Expand FP immediates into loads from the stack, except for the special + // cases we handle. + setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); + addLegalFPImmediate(APFloat(+0.0f)); // xorps + addLegalFPImmediate(APFloat(+0.0)); // FLD0 + addLegalFPImmediate(APFloat(+1.0)); // FLD1 + addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS + addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS + + // SSE->x87 conversions go through memory. + setConvertAction(MVT::f32, MVT::f64, Expand); + setConvertAction(MVT::f32, MVT::f80, Expand); + + // x87->SSE truncations need to go through memory. + setConvertAction(MVT::f80, MVT::f32, Expand); + setConvertAction(MVT::f64, MVT::f32, Expand); + // And x87->x87 truncations also. + setConvertAction(MVT::f80, MVT::f64, Expand); + + if (!UnsafeFPMath) { + setOperationAction(ISD::FSIN , MVT::f64 , Expand); + setOperationAction(ISD::FCOS , MVT::f64 , Expand); + } } else { + // f32 and f64 in x87. // Set up the FP register classes. addRegisterClass(MVT::f64, X86::RFP64RegisterClass); addRegisterClass(MVT::f32, X86::RFP32RegisterClass); @@ -335,6 +386,10 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) addLegalFPImmediate(APFloat(+1.0)); // FLD1 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS + addLegalFPImmediate(APFloat(+0.0f)); // FLD0 + addLegalFPImmediate(APFloat(+1.0f)); // FLD1 + addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS + addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS } // Long double always uses X87. @@ -583,7 +638,8 @@ SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) { // If this is an FP return with ScalarSSE, we need to move the value from // an XMM register onto the fp-stack. - if (X86ScalarSSE) { + if ((X86ScalarSSEf32 && RVLocs[0].getValVT()==MVT::f32) || + (X86ScalarSSEf64 && RVLocs[0].getValVT()==MVT::f64)) { SDOperand MemLoc; // If this is a load into a scalarsse value, don't store the loaded value @@ -659,7 +715,8 @@ LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, // If we are using ScalarSSE, store ST(0) to the stack and reload it into // an XMM register. - if (X86ScalarSSE) { + if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) || + (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) { // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This // shouldn't be necessary except that RFP cannot be live across // multiple blocks. When stackifier is fixed, they can be uncoupled. @@ -3334,7 +3391,9 @@ SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { StackSlot, NULL, 0); // These are really Legal; caller falls through into that case. - if (SrcVT==MVT::i32 && Op.getValueType() != MVT::f80 && X86ScalarSSE) + if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f32 && X86ScalarSSEf32) + return Result; + if (SrcVT==MVT::i32 && Op.getValueType() == MVT::f64 && X86ScalarSSEf64) return Result; if (SrcVT==MVT::i64 && Op.getValueType() != MVT::f80 && Subtarget->is64Bit()) @@ -3342,7 +3401,8 @@ SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { // Build the FILD SDVTList Tys; - bool useSSE = X86ScalarSSE && Op.getValueType() != MVT::f80; + bool useSSE = (X86ScalarSSEf32 && Op.getValueType() == MVT::f32) || + (X86ScalarSSEf64 && Op.getValueType() == MVT::f64); if (useSSE) Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); else @@ -3390,8 +3450,11 @@ SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); // These are really Legal. - if (Op.getValueType() == MVT::i32 && X86ScalarSSE && - Op.getOperand(0).getValueType() != MVT::f80) + if (Op.getValueType() == MVT::i32 && + X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) + return Result; + if (Op.getValueType() == MVT::i32 && + X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64) return Result; if (Subtarget->is64Bit() && Op.getValueType() == MVT::i64 && @@ -3408,7 +3471,8 @@ SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { SDOperand Chain = DAG.getEntryNode(); SDOperand Value = Op.getOperand(0); - if (X86ScalarSSE && Op.getOperand(0).getValueType() != MVT::f80) { + if ((X86ScalarSSEf32 && Op.getOperand(0).getValueType() == MVT::f32) || + (X86ScalarSSEf64 && Op.getOperand(0).getValueType() == MVT::f64)) { assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0); SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); @@ -3620,8 +3684,9 @@ SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) { // pressure reason)? SDOperand Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); - bool IllegalFPCMov = !X86ScalarSSE && - MVT::isFloatingPoint(Op.getValueType()) && + bool IllegalFPCMov = + ! ((X86ScalarSSEf32 && Op.getValueType()==MVT::f32) || + (X86ScalarSSEf64 && Op.getValueType()==MVT::f64)) && !hasFPCMov(cast(CC)->getSignExtended()); if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) && !IllegalFPCMov) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 95f95e2c8375..658b34449d6a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -373,8 +373,12 @@ namespace llvm { /// X86StackPtr - X86 physical register used as stack ptr. unsigned X86StackPtr; - /// X86ScalarSSE - Select between SSE2 or x87 floating point ops. - bool X86ScalarSSE; + /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 + /// floating point ops. + /// When SSE is available, use it for f32 operations. + /// When SSE2 is available, use it for f64 operations. + bool X86ScalarSSEf32; + bool X86ScalarSSEf64; SDNode *LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode*TheCall, unsigned CallingConv, SelectionDAG &DAG); diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td index c4a4e4c8dad0..1e8cf6a5b4d0 100644 --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -152,30 +152,33 @@ def FpSETRESULT80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, [(X86fpset RFP80:$src)]>;// ST(0) = FPR } -// FpI - Floating Point Psuedo Instruction template. Predicated on FPStack. -// Note that f80-only instructions are used even in SSE mode and use FpI_ -// not this predicate. -class FpI pattern> : - FpI_, Requires<[FPStack]>; +// FpIf32, FpIf64 - Floating Point Psuedo Instruction template. +// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1. +// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2. +// f80 instructions cannot use SSE and use neither of these. +class FpIf32 pattern> : + FpI_, Requires<[FPStackf32]>; +class FpIf64 pattern> : + FpI_, Requires<[FPStackf64]>; // Register copies. Just copies, the shortening ones do not truncate. -def MOV_Fp3232 : FpI<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>; -def MOV_Fp3264 : FpI<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>; -def MOV_Fp6432 : FpI<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>; -def MOV_Fp6464 : FpI<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>; -def MOV_Fp8032 : FpI<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>; -def MOV_Fp3280 : FpI<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>; -def MOV_Fp8064 : FpI<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>; -def MOV_Fp6480 : FpI<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>; +def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>; +def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>; +def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>; +def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>; +def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>; +def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>; +def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>; +def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>; def MOV_Fp8080 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>; // Factoring for arithmetic. multiclass FPBinary_rr { // Register op register -> register // These are separated out because they have no reversed form. -def _Fp32 : FpI<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP, +def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP, [(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>; -def _Fp64 : FpI<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP, +def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP, [(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>; def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP, [(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>; @@ -185,13 +188,13 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP, // These instructions cannot address 80-bit memory. multiclass FPBinary { // ST(0) = ST(0) + [mem] -def _Fp32m : FpI<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW, +def _Fp32m : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW, [(set RFP32:$dst, (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>; -def _Fp64m : FpI<(outs RFP64:$dst), (ins RFP64:$src1, f64mem:$src2), OneArgFPRW, +def _Fp64m : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f64mem:$src2), OneArgFPRW, [(set RFP64:$dst, (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>; -def _Fp64m32: FpI<(outs RFP64:$dst), (ins RFP64:$src1, f32mem:$src2), OneArgFPRW, +def _Fp64m32: FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, f32mem:$src2), OneArgFPRW, [(set RFP64:$dst, (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>; def _Fp80m32: FpI_<(outs RFP80:$dst), (ins RFP80:$src1, f32mem:$src2), OneArgFPRW, @@ -205,16 +208,16 @@ def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src), def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src), !strconcat("f", !strconcat(asmstring, "{l}\t$src"))>; // ST(0) = ST(0) + [memint] -def _FpI16m32 : FpI<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), OneArgFPRW, +def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), OneArgFPRW, [(set RFP32:$dst, (OpNode RFP32:$src1, (X86fild addr:$src2, i16)))]>; -def _FpI32m32 : FpI<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), OneArgFPRW, +def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), OneArgFPRW, [(set RFP32:$dst, (OpNode RFP32:$src1, (X86fild addr:$src2, i32)))]>; -def _FpI16m64 : FpI<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), OneArgFPRW, +def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), OneArgFPRW, [(set RFP64:$dst, (OpNode RFP64:$src1, (X86fild addr:$src2, i16)))]>; -def _FpI32m64 : FpI<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), OneArgFPRW, +def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), OneArgFPRW, [(set RFP64:$dst, (OpNode RFP64:$src1, (X86fild addr:$src2, i32)))]>; def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2), OneArgFPRW, @@ -271,9 +274,9 @@ def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">; // Unary operations. multiclass FPUnary opcode, string asmstring> { -def _Fp32 : FpI<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW, +def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW, [(set RFP32:$dst, (OpNode RFP32:$src))]>; -def _Fp64 : FpI<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW, +def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW, [(set RFP64:$dst, (OpNode RFP64:$src))]>; def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW, [(set RFP80:$dst, (OpNode RFP80:$src))]>; @@ -286,9 +289,9 @@ defm SQRT: FPUnary; defm SIN : FPUnary; defm COS : FPUnary; -def TST_Fp32 : FpI<(outs), (ins RFP32:$src), OneArgFP, +def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>; -def TST_Fp64 : FpI<(outs), (ins RFP64:$src), OneArgFP, +def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>; def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>; @@ -296,10 +299,10 @@ def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9; // Floating point cmovs. multiclass FPCMov { - def _Fp32 : FpI<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), CondMovFP, + def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), CondMovFP, [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, cc))]>; - def _Fp64 : FpI<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), CondMovFP, + def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), CondMovFP, [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, cc))]>; def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), CondMovFP, @@ -337,30 +340,30 @@ def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins), // Floating point loads & stores. let isLoad = 1 in { -def LD_Fp32m : FpI<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP, +def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP, [(set RFP32:$dst, (loadf32 addr:$src))]>; -def LD_Fp64m : FpI<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP, +def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP, [(set RFP64:$dst, (loadf64 addr:$src))]>; def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP, [(set RFP80:$dst, (loadf80 addr:$src))]>; } -def LD_Fp32m64 : FpI<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP, +def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP, [(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>; def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP, [(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>; def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP, [(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>; -def ILD_Fp16m32: FpI<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP, +def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP, [(set RFP32:$dst, (X86fild addr:$src, i16))]>; -def ILD_Fp32m32: FpI<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP, +def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP, [(set RFP32:$dst, (X86fild addr:$src, i32))]>; -def ILD_Fp64m32: FpI<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP, +def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP, [(set RFP32:$dst, (X86fild addr:$src, i64))]>; -def ILD_Fp16m64: FpI<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP, +def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP, [(set RFP64:$dst, (X86fild addr:$src, i16))]>; -def ILD_Fp32m64: FpI<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP, +def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP, [(set RFP64:$dst, (X86fild addr:$src, i32))]>; -def ILD_Fp64m64: FpI<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP, +def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP, [(set RFP64:$dst, (X86fild addr:$src, i64))]>; def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP, [(set RFP80:$dst, (X86fild addr:$src, i16))]>; @@ -369,11 +372,11 @@ def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP, def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP, [(set RFP80:$dst, (X86fild addr:$src, i64))]>; -def ST_Fp32m : FpI<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, +def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, [(store RFP32:$src, addr:$op)]>; -def ST_Fp64m32 : FpI<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, +def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, [(truncstoref32 RFP64:$src, addr:$op)]>; -def ST_Fp64m : FpI<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, +def ST_Fp64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, [(store RFP64:$src, addr:$op)]>; def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, [(truncstoref32 RFP80:$src, addr:$op)]>; @@ -381,19 +384,19 @@ def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, [(truncstoref64 RFP80:$src, addr:$op)]>; // FST does not support 80-bit memory target; FSTP must be used. -def ST_FpP32m : FpI<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>; -def ST_FpP64m32 : FpI<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>; -def ST_FpP64m : FpI<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>; -def ST_FpP80m32 : FpI<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>; -def ST_FpP80m64 : FpI<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>; +def ST_FpP32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>; +def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>; +def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>; +def ST_FpP80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>; +def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>; def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP, [(store RFP80:$src, addr:$op)]>; -def IST_Fp16m32 : FpI<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>; -def IST_Fp32m32 : FpI<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>; -def IST_Fp64m32 : FpI<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>; -def IST_Fp16m64 : FpI<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>; -def IST_Fp32m64 : FpI<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>; -def IST_Fp64m64 : FpI<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>; +def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>; +def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>; +def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>; +def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>; +def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>; +def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>; def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>; def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>; def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>; @@ -456,13 +459,13 @@ def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op">, D9; // Floating point constant loads. let isReMaterializable = 1 in { -def LD_Fp032 : FpI<(outs RFP32:$dst), (ins), ZeroArgFP, +def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP, [(set RFP32:$dst, fpimm0)]>; -def LD_Fp132 : FpI<(outs RFP32:$dst), (ins), ZeroArgFP, +def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP, [(set RFP32:$dst, fpimm1)]>; -def LD_Fp064 : FpI<(outs RFP64:$dst), (ins), ZeroArgFP, +def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP, [(set RFP64:$dst, fpimm0)]>; -def LD_Fp164 : FpI<(outs RFP64:$dst), (ins), ZeroArgFP, +def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP, [(set RFP64:$dst, fpimm1)]>; def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP, [(set RFP80:$dst, fpimm0)]>; @@ -475,13 +478,13 @@ def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1">, D9; // Floating point compares. -def UCOM_Fpr32 : FpI<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, +def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, []>; // FPSW = cmp ST(0) with ST(i) -def UCOM_FpIr32: FpI<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, +def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, [(X86cmp RFP32:$lhs, RFP32:$rhs)]>; // CC = ST(0) cmp ST(i) -def UCOM_Fpr64 : FpI<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, +def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, []>; // FPSW = cmp ST(0) with ST(i) -def UCOM_FpIr64: FpI<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, +def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, [(X86cmp RFP64:$lhs, RFP64:$rhs)]>; // CC = ST(0) cmp ST(i) def UCOM_Fpr80 : FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, []>; // FPSW = cmp ST(0) with ST(i) @@ -535,16 +538,16 @@ def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op, RFP80:$src)> def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op, RFP80:$src)>; // Floating point constant -0.0 and -1.0 -def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStack]>; -def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStack]>; -def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStack]>; -def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStack]>; +def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>; +def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>; +def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>; +def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>; def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>; def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>; // Used to conv. i64 to f64 since there isn't a SSE version. def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>; -def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, Requires<[FPStack]>; -def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, Requires<[FPStack]>; -def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, Requires<[FPStack]>; +def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, Requires<[FPStackf32]>; +def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, Requires<[FPStackf32]>; +def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, Requires<[FPStackf64]>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 4f6e372560ec..dde59fc04be4 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -160,7 +160,8 @@ def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; -def FPStack : Predicate<"!Subtarget->hasSSE2()">; +def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; +def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def In32BitMode : Predicate<"!Subtarget->is64Bit()">; def In64BitMode : Predicate<"Subtarget->is64Bit()">; def HasLow4G : Predicate<"Subtarget->hasLow4GUserSpaceAddress()">; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 7fb7d2efb25c..19ec6adf17df 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -50,7 +50,7 @@ def IMPLICIT_DEF_VR128 : I<0, Pseudo, (outs VR128:$dst), (ins), Requires<[HasSSE1]>; def IMPLICIT_DEF_FR32 : I<0, Pseudo, (outs FR32:$dst), (ins), "#IMPLICIT_DEF $dst", - [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>; + [(set FR32:$dst, (undef))]>, Requires<[HasSSE1]>; def IMPLICIT_DEF_FR64 : I<0, Pseudo, (outs FR64:$dst), (ins), "#IMPLICIT_DEF $dst", [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;