[X86] Don't use Expand for i32 fp_to_uint on SSE1/2 targets on 32-bit target.

Use Custom lowering instead. Fall back to default expansion only
when the scalar FP type belongs in an XMM register. This improves
lowering for i32 to fp80, and also i32 to double on SSE1 only.

llvm-svn: 370699
This commit is contained in:
Craig Topper 2019-09-03 05:57:18 +00:00
parent f255f44336
commit 9dc8c448ed
2 changed files with 63 additions and 139 deletions

View File

@ -287,19 +287,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
}
} else if (!Subtarget.useSoftFloat()) {
// Since AVX is a superset of SSE3, only check for SSE here.
if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
else
// With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
// With SSE3 we can use fisttpll to convert to a signed i64; without
// SSE, we're stuck with a fistpll.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
@ -19425,6 +19414,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
if (UseSSEReg && IsSigned)
return Op;
// Use default expansion for SSE1/2 targets without SSE3. With SSE3 we can use
// fisttp.
if (!IsSigned && UseSSEReg && !Subtarget.hasSSE3())
return SDValue();
// Fall back to X87.
if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned))
return V;

View File

@ -257,74 +257,36 @@ define i32 @d_to_u32(double %a) nounwind {
;
; SSE_32_WIN-LABEL: d_to_u32:
; SSE_32_WIN: # %bb.0:
; SSE_32_WIN-NEXT: pushl %ebp
; SSE_32_WIN-NEXT: movl %esp, %ebp
; SSE_32_WIN-NEXT: andl $-8, %esp
; SSE_32_WIN-NEXT: subl $16, %esp
; SSE_32_WIN-NEXT: fldl {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: flds __real@4f000000
; SSE_32_WIN-NEXT: fld %st(1)
; SSE_32_WIN-NEXT: fsub %st(1), %st
; SSE_32_WIN-NEXT: fldl 8(%ebp)
; SSE_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fistpl {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fnstcw (%esp)
; SSE_32_WIN-NEXT: movzwl (%esp), %eax
; SSE_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fxch %st(1)
; SSE_32_WIN-NEXT: fistl {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw (%esp)
; SSE_32_WIN-NEXT: fxch %st(1)
; SSE_32_WIN-NEXT: fucompi %st(1), %st
; SSE_32_WIN-NEXT: fstp %st(0)
; SSE_32_WIN-NEXT: jbe LBB2_1
; SSE_32_WIN-NEXT: # %bb.2:
; SSE_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE_32_WIN-NEXT: addl $16, %esp
; SSE_32_WIN-NEXT: retl
; SSE_32_WIN-NEXT: LBB2_1:
; SSE_32_WIN-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; SSE_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE_32_WIN-NEXT: addl $16, %esp
; SSE_32_WIN-NEXT: movl %ebp, %esp
; SSE_32_WIN-NEXT: popl %ebp
; SSE_32_WIN-NEXT: retl
;
; SSE_32_LIN-LABEL: d_to_u32:
; SSE_32_LIN: # %bb.0:
; SSE_32_LIN-NEXT: subl $16, %esp
; SSE_32_LIN-NEXT: subl $20, %esp
; SSE_32_LIN-NEXT: fldl {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: flds {{\.LCPI.*}}
; SSE_32_LIN-NEXT: fld %st(1)
; SSE_32_LIN-NEXT: fsub %st(1), %st
; SSE_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fistpl {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fnstcw (%esp)
; SSE_32_LIN-NEXT: movzwl (%esp), %eax
; SSE_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fxch %st(1)
; SSE_32_LIN-NEXT: fistl {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw (%esp)
; SSE_32_LIN-NEXT: fxch %st(1)
; SSE_32_LIN-NEXT: fucompi %st(1), %st
; SSE_32_LIN-NEXT: fstp %st(0)
; SSE_32_LIN-NEXT: jbe .LBB2_1
; SSE_32_LIN-NEXT: # %bb.2:
; SSE_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE_32_LIN-NEXT: addl $16, %esp
; SSE_32_LIN-NEXT: retl
; SSE_32_LIN-NEXT: .LBB2_1:
; SSE_32_LIN-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; SSE_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE_32_LIN-NEXT: addl $16, %esp
; SSE_32_LIN-NEXT: addl $20, %esp
; SSE_32_LIN-NEXT: retl
;
; X87_WIN-LABEL: d_to_u32:
@ -507,52 +469,36 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind {
;
; SSE2_32_WIN-LABEL: x_to_u32:
; SSE2_32_WIN: # %bb.0:
; SSE2_32_WIN-NEXT: subl $8, %esp
; SSE2_32_WIN-NEXT: fldt {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: flds __real@4f000000
; SSE2_32_WIN-NEXT: fld %st(1)
; SSE2_32_WIN-NEXT: fsub %st(1), %st
; SSE2_32_WIN-NEXT: xorl %eax, %eax
; SSE2_32_WIN-NEXT: fxch %st(1)
; SSE2_32_WIN-NEXT: fucompi %st(2), %st
; SSE2_32_WIN-NEXT: fcmovnbe %st(1), %st
; SSE2_32_WIN-NEXT: fstp %st(1)
; SSE2_32_WIN-NEXT: setbe %al
; SSE2_32_WIN-NEXT: fnstcw (%esp)
; SSE2_32_WIN-NEXT: movzwl (%esp), %ecx
; SSE2_32_WIN-NEXT: orl $3072, %ecx # imm = 0xC00
; SSE2_32_WIN-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: pushl %ebp
; SSE2_32_WIN-NEXT: movl %esp, %ebp
; SSE2_32_WIN-NEXT: andl $-8, %esp
; SSE2_32_WIN-NEXT: subl $16, %esp
; SSE2_32_WIN-NEXT: fldt 8(%ebp)
; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE2_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE2_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: fistpl {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: fldcw (%esp)
; SSE2_32_WIN-NEXT: shll $31, %eax
; SSE2_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE2_32_WIN-NEXT: addl $8, %esp
; SSE2_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE2_32_WIN-NEXT: movl %ebp, %esp
; SSE2_32_WIN-NEXT: popl %ebp
; SSE2_32_WIN-NEXT: retl
;
; SSE2_32_LIN-LABEL: x_to_u32:
; SSE2_32_LIN: # %bb.0:
; SSE2_32_LIN-NEXT: subl $8, %esp
; SSE2_32_LIN-NEXT: subl $20, %esp
; SSE2_32_LIN-NEXT: fldt {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: flds {{\.LCPI.*}}
; SSE2_32_LIN-NEXT: fld %st(1)
; SSE2_32_LIN-NEXT: fsub %st(1), %st
; SSE2_32_LIN-NEXT: xorl %eax, %eax
; SSE2_32_LIN-NEXT: fxch %st(1)
; SSE2_32_LIN-NEXT: fucompi %st(2), %st
; SSE2_32_LIN-NEXT: fcmovnbe %st(1), %st
; SSE2_32_LIN-NEXT: fstp %st(1)
; SSE2_32_LIN-NEXT: setbe %al
; SSE2_32_LIN-NEXT: fnstcw (%esp)
; SSE2_32_LIN-NEXT: movzwl (%esp), %ecx
; SSE2_32_LIN-NEXT: orl $3072, %ecx # imm = 0xC00
; SSE2_32_LIN-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE2_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE2_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: fistpl {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: fldcw (%esp)
; SSE2_32_LIN-NEXT: shll $31, %eax
; SSE2_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE2_32_LIN-NEXT: addl $8, %esp
; SSE2_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE2_32_LIN-NEXT: addl $20, %esp
; SSE2_32_LIN-NEXT: retl
;
; SSE2_64_WIN-LABEL: x_to_u32:
@ -585,52 +531,36 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind {
;
; SSE_32_WIN-LABEL: x_to_u32:
; SSE_32_WIN: # %bb.0:
; SSE_32_WIN-NEXT: subl $8, %esp
; SSE_32_WIN-NEXT: fldt {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: flds __real@4f000000
; SSE_32_WIN-NEXT: fld %st(1)
; SSE_32_WIN-NEXT: fsub %st(1), %st
; SSE_32_WIN-NEXT: xorl %eax, %eax
; SSE_32_WIN-NEXT: fxch %st(1)
; SSE_32_WIN-NEXT: fucompi %st(2), %st
; SSE_32_WIN-NEXT: fcmovnbe %st(1), %st
; SSE_32_WIN-NEXT: fstp %st(1)
; SSE_32_WIN-NEXT: setbe %al
; SSE_32_WIN-NEXT: fnstcw (%esp)
; SSE_32_WIN-NEXT: movzwl (%esp), %ecx
; SSE_32_WIN-NEXT: orl $3072, %ecx # imm = 0xC00
; SSE_32_WIN-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: pushl %ebp
; SSE_32_WIN-NEXT: movl %esp, %ebp
; SSE_32_WIN-NEXT: andl $-8, %esp
; SSE_32_WIN-NEXT: subl $16, %esp
; SSE_32_WIN-NEXT: fldt 8(%ebp)
; SSE_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fistpl {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw (%esp)
; SSE_32_WIN-NEXT: shll $31, %eax
; SSE_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE_32_WIN-NEXT: addl $8, %esp
; SSE_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE_32_WIN-NEXT: movl %ebp, %esp
; SSE_32_WIN-NEXT: popl %ebp
; SSE_32_WIN-NEXT: retl
;
; SSE_32_LIN-LABEL: x_to_u32:
; SSE_32_LIN: # %bb.0:
; SSE_32_LIN-NEXT: subl $8, %esp
; SSE_32_LIN-NEXT: subl $20, %esp
; SSE_32_LIN-NEXT: fldt {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: flds {{\.LCPI.*}}
; SSE_32_LIN-NEXT: fld %st(1)
; SSE_32_LIN-NEXT: fsub %st(1), %st
; SSE_32_LIN-NEXT: xorl %eax, %eax
; SSE_32_LIN-NEXT: fxch %st(1)
; SSE_32_LIN-NEXT: fucompi %st(2), %st
; SSE_32_LIN-NEXT: fcmovnbe %st(1), %st
; SSE_32_LIN-NEXT: fstp %st(1)
; SSE_32_LIN-NEXT: setbe %al
; SSE_32_LIN-NEXT: fnstcw (%esp)
; SSE_32_LIN-NEXT: movzwl (%esp), %ecx
; SSE_32_LIN-NEXT: orl $3072, %ecx # imm = 0xC00
; SSE_32_LIN-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fistpl {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw (%esp)
; SSE_32_LIN-NEXT: shll $31, %eax
; SSE_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE_32_LIN-NEXT: addl $8, %esp
; SSE_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE_32_LIN-NEXT: addl $20, %esp
; SSE_32_LIN-NEXT: retl
;
; X87_WIN-LABEL: x_to_u32: