forked from OSchip/llvm-project
[X86] Fix fptoui conversions
This fixes two issues in x86 fptoui lowering. 1) Makes conversions from f80 go through the right path on AVX-512. 2) Implements an inline sequence for fptoui i64 instead of a library call. This improves performance by 6X on SSE3+ and 3X otherwise. Incidentally, it also removes the use of ftol2 for fptoui, which was wrong to begin with, as ftol2 converts to a signed i64, producing wrong results for values >= 2^63. Patch by: mitch.l.bodart@intel.com Differential Revision: http://reviews.llvm.org/D11316 llvm-svn: 245924
This commit is contained in:
parent
2c8f9c2c23
commit
8515893be8
|
@ -114,13 +114,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
|
||||
setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
|
||||
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
|
||||
|
||||
// The _ftol2 runtime function has an unusual calling conv, which
|
||||
// is modeled by a special pseudo-instruction.
|
||||
setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
|
||||
setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
|
||||
setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
|
||||
setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
|
||||
}
|
||||
|
||||
if (Subtarget->isTargetDarwin()) {
|
||||
|
@ -228,8 +221,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
|
||||
|
||||
if (Subtarget->is64Bit()) {
|
||||
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
|
||||
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
|
||||
if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
|
||||
// FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
|
||||
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
|
||||
} else {
|
||||
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
|
||||
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
|
||||
}
|
||||
} else if (!Subtarget->useSoftFloat()) {
|
||||
// Since AVX is a superset of SSE3, only check for SSE here.
|
||||
if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
|
||||
|
@ -238,14 +237,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
// the optimal thing for SSE vs. the default expansion in the legalizer.
|
||||
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
|
||||
else
|
||||
// With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
|
||||
// With SSE3 we can use fisttpll to convert to a signed i64; without
|
||||
// SSE, we're stuck with a fistpll.
|
||||
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
|
||||
}
|
||||
|
||||
if (isTargetFTOL()) {
|
||||
// Use the _ftol2 runtime function, which has a pseudo-instruction
|
||||
// to handle its weird calling convention.
|
||||
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
|
||||
}
|
||||
|
||||
|
@ -1330,13 +1326,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::FMA, MVT::v8f64, Legal);
|
||||
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
|
||||
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
|
||||
// FIXME: [US]INT_TO_FP are not legal for f80.
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
|
||||
if (Subtarget->is64Bit()) {
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
|
||||
}
|
||||
|
@ -12334,15 +12327,45 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
|||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
// If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation
|
||||
// is legal, or has an f16 source (which needs to be promoted to f32),
|
||||
// just return an <SDValue(), SDValue()> pair.
|
||||
// Otherwise it is assumed to be a conversion from one of f32, f64 or f80
|
||||
// to i16, i32 or i64, and we lower it to a legal sequence.
|
||||
// If lowered to the final integer result we return a <result, SDValue()> pair.
|
||||
// Otherwise we lower it to a sequence ending with a FIST, return a
|
||||
// <FIST, StackSlot> pair, and the caller is responsible for loading
|
||||
// the final integer result from StackSlot.
|
||||
std::pair<SDValue,SDValue>
|
||||
X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
|
||||
bool IsSigned, bool IsReplace) const {
|
||||
X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
|
||||
bool IsSigned, bool IsReplace) const {
|
||||
SDLoc DL(Op);
|
||||
|
||||
EVT DstTy = Op.getValueType();
|
||||
EVT TheVT = Op.getOperand(0).getValueType();
|
||||
auto PtrVT = getPointerTy(DAG.getDataLayout());
|
||||
|
||||
if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
|
||||
if (TheVT == MVT::f16)
|
||||
// We need to promote the f16 to f32 before using the lowering
|
||||
// in this routine.
|
||||
return std::make_pair(SDValue(), SDValue());
|
||||
|
||||
assert((TheVT == MVT::f32 ||
|
||||
TheVT == MVT::f64 ||
|
||||
TheVT == MVT::f80) &&
|
||||
"Unexpected FP operand type in FP_TO_INTHelper");
|
||||
|
||||
// If using FIST to compute an unsigned i64, we'll need some fixup
|
||||
// to handle values above the maximum signed i64. A FIST is always
|
||||
// used for the 32-bit subtarget, but also for f80 on a 64-bit target.
|
||||
bool UnsignedFixup = !IsSigned &&
|
||||
DstTy == MVT::i64 &&
|
||||
(!Subtarget->is64Bit() ||
|
||||
!isScalarFPTypeInSSEReg(TheVT));
|
||||
|
||||
if (!IsSigned && DstTy != MVT::i64 && !Subtarget->hasAVX512()) {
|
||||
// Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
|
||||
// The low 32 bits of the fist result will have the correct uint32 result.
|
||||
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
|
||||
DstTy = MVT::i64;
|
||||
}
|
||||
|
@ -12360,27 +12383,72 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
|
|||
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
|
||||
return std::make_pair(SDValue(), SDValue());
|
||||
|
||||
// We lower FP->int64 either into FISTP64 followed by a load from a temporary
|
||||
// stack slot, or into the FTOL runtime function.
|
||||
// We lower FP->int64 into FISTP64 followed by a load from a temporary
|
||||
// stack slot.
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
unsigned MemSize = DstTy.getSizeInBits()/8;
|
||||
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
|
||||
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
|
||||
|
||||
unsigned Opc;
|
||||
if (!IsSigned && isIntegerTypeFTOL(DstTy))
|
||||
Opc = X86ISD::WIN_FTOL;
|
||||
else
|
||||
switch (DstTy.getSimpleVT().SimpleTy) {
|
||||
default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
|
||||
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
|
||||
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
|
||||
case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
|
||||
}
|
||||
switch (DstTy.getSimpleVT().SimpleTy) {
|
||||
default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
|
||||
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
|
||||
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
|
||||
case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
|
||||
}
|
||||
|
||||
SDValue Chain = DAG.getEntryNode();
|
||||
SDValue Value = Op.getOperand(0);
|
||||
EVT TheVT = Op.getOperand(0).getValueType();
|
||||
SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
|
||||
|
||||
if (UnsignedFixup) {
|
||||
//
|
||||
// Conversion to unsigned i64 is implemented with a select,
|
||||
// depending on whether the source value fits in the range
|
||||
// of a signed i64. Let Thresh be the FP equivalent of
|
||||
// 0x8000000000000000ULL.
|
||||
//
|
||||
// Adjust i32 = (Value < Thresh) ? 0 : 0x80000000;
|
||||
// FistSrc = (Value < Thresh) ? Value : (Value - Thresh);
|
||||
// Fist-to-mem64 FistSrc
|
||||
// Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent
|
||||
// to XOR'ing the high 32 bits with Adjust.
|
||||
//
|
||||
// Being a power of 2, Thresh is exactly representable in all FP formats.
|
||||
// For X87 we'd like to use the smallest FP type for this constant, but
|
||||
// for DAG type consistency we have to match the FP operand type.
|
||||
|
||||
APFloat Thresh(APFloat::IEEEsingle, APInt(32, 0x5f000000));
|
||||
APFloat::opStatus Status = APFloat::opOK;
|
||||
bool LosesInfo = false;
|
||||
if (TheVT == MVT::f64)
|
||||
// The rounding mode is irrelevant as the conversion should be exact.
|
||||
Status = Thresh.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
|
||||
&LosesInfo);
|
||||
else if (TheVT == MVT::f80)
|
||||
Status = Thresh.convert(APFloat::x87DoubleExtended,
|
||||
APFloat::rmNearestTiesToEven, &LosesInfo);
|
||||
|
||||
assert(Status == APFloat::opOK && !LosesInfo &&
|
||||
"FP conversion should have been exact");
|
||||
|
||||
SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);
|
||||
|
||||
SDValue Cmp = DAG.getSetCC(DL,
|
||||
getSetCCResultType(DAG.getDataLayout(),
|
||||
*DAG.getContext(), TheVT),
|
||||
Value, ThreshVal, ISD::SETLT);
|
||||
Adjust = DAG.getSelect(DL, MVT::i32, Cmp,
|
||||
DAG.getConstant(0, DL, MVT::i32),
|
||||
DAG.getConstant(0x80000000, DL, MVT::i32));
|
||||
SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
|
||||
Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
|
||||
*DAG.getContext(), TheVT),
|
||||
Value, ThreshVal, ISD::SETLT);
|
||||
Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub);
|
||||
}
|
||||
|
||||
// FIXME This causes a redundant load/store if the SSE-class value is already
|
||||
// in memory, such as if it is on the callstack.
|
||||
if (isScalarFPTypeInSSEReg(TheVT)) {
|
||||
|
@ -12406,25 +12474,49 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
|
|||
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
|
||||
MachineMemOperand::MOStore, MemSize, MemSize);
|
||||
|
||||
if (Opc != X86ISD::WIN_FTOL) {
|
||||
if (UnsignedFixup) {
|
||||
|
||||
// Insert the FIST, load its result as two i32's,
|
||||
// and XOR the high i32 with Adjust.
|
||||
|
||||
SDValue FistOps[] = { Chain, Value, StackSlot };
|
||||
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
|
||||
FistOps, DstTy, MMO);
|
||||
|
||||
SDValue Low32 = DAG.getLoad(MVT::i32, DL, FIST, StackSlot,
|
||||
MachinePointerInfo(),
|
||||
false, false, false, 0);
|
||||
SDValue HighAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackSlot,
|
||||
DAG.getConstant(4, DL, PtrVT));
|
||||
|
||||
SDValue High32 = DAG.getLoad(MVT::i32, DL, FIST, HighAddr,
|
||||
MachinePointerInfo(),
|
||||
false, false, false, 0);
|
||||
High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust);
|
||||
|
||||
if (Subtarget->is64Bit()) {
|
||||
// Join High32 and Low32 into a 64-bit result.
|
||||
// (High32 << 32) | Low32
|
||||
Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32);
|
||||
High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32);
|
||||
High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32,
|
||||
DAG.getConstant(32, DL, MVT::i8));
|
||||
SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32);
|
||||
return std::make_pair(Result, SDValue());
|
||||
}
|
||||
|
||||
SDValue ResultOps[] = { Low32, High32 };
|
||||
|
||||
SDValue pair = IsReplace
|
||||
? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps)
|
||||
: DAG.getMergeValues(ResultOps, DL);
|
||||
return std::make_pair(pair, SDValue());
|
||||
} else {
|
||||
// Build the FP_TO_INT*_IN_MEM
|
||||
SDValue Ops[] = { Chain, Value, StackSlot };
|
||||
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
|
||||
Ops, DstTy, MMO);
|
||||
return std::make_pair(FIST, StackSlot);
|
||||
} else {
|
||||
SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
|
||||
DAG.getVTList(MVT::Other, MVT::Glue),
|
||||
Chain, Value);
|
||||
SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX,
|
||||
MVT::i32, ftol.getValue(1));
|
||||
SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX,
|
||||
MVT::i32, eax.getValue(2));
|
||||
SDValue Ops[] = { eax, edx };
|
||||
SDValue pair = IsReplace
|
||||
? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops)
|
||||
: DAG.getMergeValues(Ops, DL);
|
||||
return std::make_pair(pair, SDValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12688,7 +12780,8 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
|
|||
/*IsSigned=*/ true, /*IsReplace=*/ false);
|
||||
SDValue FIST = Vals.first, StackSlot = Vals.second;
|
||||
// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
|
||||
if (!FIST.getNode()) return Op;
|
||||
if (!FIST.getNode())
|
||||
return Op;
|
||||
|
||||
if (StackSlot.getNode())
|
||||
// Load the result.
|
||||
|
@ -12705,7 +12798,9 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
|
|||
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
|
||||
/*IsSigned=*/ false, /*IsReplace=*/ false);
|
||||
SDValue FIST = Vals.first, StackSlot = Vals.second;
|
||||
assert(FIST.getNode() && "Unexpected failure");
|
||||
// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
|
||||
if (!FIST.getNode())
|
||||
return Op;
|
||||
|
||||
if (StackSlot.getNode())
|
||||
// Load the result.
|
||||
|
@ -18885,17 +18980,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
return;
|
||||
}
|
||||
case ISD::FP_TO_SINT:
|
||||
// FP_TO_INT*_IN_MEM is not legal for f16 inputs. Do not convert
|
||||
// (FP_TO_SINT (load f16)) to FP_TO_INT*.
|
||||
if (N->getOperand(0).getValueType() == MVT::f16)
|
||||
break;
|
||||
// fallthrough
|
||||
case ISD::FP_TO_UINT: {
|
||||
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
|
||||
|
||||
if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType()))
|
||||
return;
|
||||
|
||||
std::pair<SDValue,SDValue> Vals =
|
||||
FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
|
||||
SDValue FIST = Vals.first, StackSlot = Vals.second;
|
||||
|
@ -26507,10 +26594,6 @@ int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
|
|||
return -1;
|
||||
}
|
||||
|
||||
bool X86TargetLowering::isTargetFTOL() const {
|
||||
return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit();
|
||||
}
|
||||
|
||||
bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
|
||||
// Integer division on x86 is expensive. However, when aggressively optimizing
|
||||
// for code size, we prefer to use a div instruction, as it is usually smaller
|
||||
|
|
|
@ -856,15 +856,6 @@ namespace llvm {
|
|||
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
|
||||
}
|
||||
|
||||
/// Return true if the target uses the MSVC _ftol2 routine for fptoui.
|
||||
bool isTargetFTOL() const;
|
||||
|
||||
/// Return true if the MSVC _ftol2 routine should be used for fptoui to the
|
||||
/// given type.
|
||||
bool isIntegerTypeFTOL(EVT VT) const {
|
||||
return isTargetFTOL() && VT == MVT::i64;
|
||||
}
|
||||
|
||||
/// \brief Returns true if it is beneficial to convert a load of a constant
|
||||
/// to just the constant itself.
|
||||
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
|
||||
|
|
|
@ -30,5 +30,5 @@ define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) {
|
|||
|
||||
; CHECK: foo
|
||||
; CHECK-NOT: vzeroupper
|
||||
; CHECK: _ftol2
|
||||
; CHECK: {{cvtt|fist}}
|
||||
; CHECK: ret
|
||||
|
|
|
@ -0,0 +1,135 @@
|
|||
; Check that scalar FP conversions to signed and unsigned int64 are using
|
||||
; reasonable sequences, across platforms and target switches.
|
||||
;
|
||||
; The signed case is straight forward, and the tests here basically
|
||||
; ensure successful compilation (f80 with avx512 was broken at one point).
|
||||
;
|
||||
; For the unsigned case there are many possible sequences, so to avoid
|
||||
; a fragile test we just check for the presence of a few key instructions.
|
||||
; AVX512 on Intel64 can use vcvtts[ds]2usi directly for float and double.
|
||||
; Otherwise the sequence will involve an FP subtract (fsub, subss or subsd),
|
||||
; and a truncating conversion (cvtts[ds]2si, fisttp, or fnstcw+fist). When
|
||||
; both a subtract and fnstcw are needed, they can occur in either order.
|
||||
;
|
||||
; The interesting subtargets are AVX512F (vcvtts[ds]2usi), SSE3 (fisttp),
|
||||
; SSE2 (cvtts[ds]2si) and vanilla X87 (fnstcw+fist, 32-bit only).
|
||||
;
|
||||
; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32
|
||||
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64
|
||||
; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_32
|
||||
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_32
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_64
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_64
|
||||
; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32
|
||||
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64
|
||||
; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=-sse | FileCheck %s --check-prefix=CHECK --check-prefix=X87
|
||||
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=-sse | FileCheck %s --check-prefix=CHECK --check-prefix=X87
|
||||
|
||||
; CHECK-LABEL: f_to_u64
|
||||
; X87-DAG: fsub
|
||||
; X87-DAG: fnstcw
|
||||
; X87: fist
|
||||
; SSE2_32-DAG: {{subss|fsub}}
|
||||
; SSE2_32-DAG: fnstcw
|
||||
; SSE2_32: fist
|
||||
; SSE2_64: subss
|
||||
; SSE2_64: cvttss2si
|
||||
; SSE3_32: {{subss|fsub}}
|
||||
; SSE3_32: fistt
|
||||
; SSE3_64: subss
|
||||
; SSE3_64: cvttss2si
|
||||
; AVX512_32: {{subss|fsub}}
|
||||
; AVX512_32: fistt
|
||||
; AVX512_64: vcvttss2usi
|
||||
; CHECK: ret
|
||||
define i64 @f_to_u64(float %a) nounwind {
|
||||
%r = fptoui float %a to i64
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: f_to_s64
|
||||
; X87: fnstcw
|
||||
; X87: fist
|
||||
; SSE2_32: fnstcw
|
||||
; SSE2_32: fist
|
||||
; SSE2_64: cvttss2si
|
||||
; SSE3_32: fistt
|
||||
; SSE3_64: cvttss2si
|
||||
; AVX512_32: fistt
|
||||
; AVX512_64: vcvttss2si
|
||||
; CHECK: ret
|
||||
define i64 @f_to_s64(float %a) nounwind {
|
||||
%r = fptosi float %a to i64
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: d_to_u64
|
||||
; X87-DAG: fsub
|
||||
; X87-DAG: fnstcw
|
||||
; X87: fist
|
||||
; SSE2_32-DAG: {{subsd|fsub}}
|
||||
; SSE2_32-DAG: fnstcw
|
||||
; SSE2_32: fist
|
||||
; SSE2_64: subsd
|
||||
; SSE2_64: cvttsd2si
|
||||
; SSE3_32: {{subsd|fsub}}
|
||||
; SSE3_32: fistt
|
||||
; SSE3_64: subsd
|
||||
; SSE3_64: cvttsd2si
|
||||
; AVX512_32: {{subsd|fsub}}
|
||||
; AVX512_32: fistt
|
||||
; AVX512_64: vcvttsd2usi
|
||||
; CHECK: ret
|
||||
define i64 @d_to_u64(double %a) nounwind {
|
||||
%r = fptoui double %a to i64
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: d_to_s64
|
||||
; X87: fnstcw
|
||||
; X87: fist
|
||||
; SSE2_32: fnstcw
|
||||
; SSE2_32: fist
|
||||
; SSE2_64: cvttsd2si
|
||||
; SSE3_32: fistt
|
||||
; SSE3_64: cvttsd2si
|
||||
; AVX512_32: fistt
|
||||
; AVX512_64: vcvttsd2si
|
||||
; CHECK: ret
|
||||
define i64 @d_to_s64(double %a) nounwind {
|
||||
%r = fptosi double %a to i64
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: x_to_u64
|
||||
; CHECK-DAG: fsub
|
||||
; X87-DAG: fnstcw
|
||||
; SSE2_32-DAG: fnstcw
|
||||
; SSE2_64-DAG: fnstcw
|
||||
; CHECK: fist
|
||||
; CHECK: ret
|
||||
define i64 @x_to_u64(x86_fp80 %a) nounwind {
|
||||
%r = fptoui x86_fp80 %a to i64
|
||||
ret i64 %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: x_to_s64
|
||||
; X87: fnstcw
|
||||
; X87: fist
|
||||
; SSE2_32: fnstcw
|
||||
; SSE2_32: fist
|
||||
; SSE2_64: fnstcw
|
||||
; SSE2_64: fist
|
||||
; SSE3_32: fistt
|
||||
; SSE3_64: fistt
|
||||
; AVX512_32: fistt
|
||||
; AVX512_64: fistt
|
||||
; CHECK: ret
|
||||
define i64 @x_to_s64(x86_fp80 %a) nounwind {
|
||||
%r = fptosi x86_fp80 %a to i64
|
||||
ret i64 %r
|
||||
}
|
|
@ -1,15 +1,17 @@
|
|||
; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=generic | FileCheck %s -check-prefix=FTOL
|
||||
; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=generic | FileCheck %s -check-prefix=COMPILERRT
|
||||
; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
|
||||
; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
|
||||
; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=COMPILERRT
|
||||
; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL_2
|
||||
; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
|
||||
|
||||
; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This
|
||||
; function has a nonstandard calling convention: the input value is expected on
|
||||
; the x87 stack instead of the callstack. The input value is popped by the
|
||||
; callee. Mingw32 uses normal cdecl compiler-rt functions.
|
||||
; This test originally used the FTOL and FTOL_2 checks for the i686-pc-win32
|
||||
; triples, under the assumption that Win32 targets should use the MSVCRT
|
||||
; _ftol2 runtime function for fptoui to i64. That usage was incorrect,
|
||||
; as _ftol2 performs conversion to signed i64. As of the compiler fix,
|
||||
; the FTOL/FTOL_2 checks are no longer used, which basically renders
|
||||
; this test meaningless.
|
||||
|
||||
define i64 @double_ui64(double %x) nounwind {
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue