[X86] Don't use Expand for i32 fp_to_uint on SSE1/2 targets on 32-bit target.

Use Custom lowering instead. Fall back to default expansion only
when the scalar FP type belongs in an XMM register. This improves
lowering for i32 to fp80, and also i32 to double on SSE1 only.

llvm-svn: 370699
This commit is contained in:
Craig Topper 2019-09-03 05:57:18 +00:00
parent f255f44336
commit 9dc8c448ed
2 changed files with 63 additions and 139 deletions

View File

@ -287,18 +287,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
}
} else if (!Subtarget.useSoftFloat()) {
// Since AVX is a superset of SSE3, only check for SSE here.
if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
else
// With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
// With SSE3 we can use fisttpll to convert to a signed i64; without
// SSE, we're stuck with a fistpll.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
}
@ -19425,6 +19414,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
if (UseSSEReg && IsSigned)
return Op;
// Use default expansion for SSE1/2 targets without SSE3. With SSE3 we can use
// fisttp.
if (!IsSigned && UseSSEReg && !Subtarget.hasSSE3())
return SDValue();
// Fall back to X87.
if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned))
return V;

View File

@ -257,74 +257,36 @@ define i32 @d_to_u32(double %a) nounwind {
;
; SSE_32_WIN-LABEL: d_to_u32:
; SSE_32_WIN: # %bb.0:
; SSE_32_WIN-NEXT: pushl %ebp
; SSE_32_WIN-NEXT: movl %esp, %ebp
; SSE_32_WIN-NEXT: andl $-8, %esp
; SSE_32_WIN-NEXT: subl $16, %esp
; SSE_32_WIN-NEXT: fldl {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: flds __real@4f000000
; SSE_32_WIN-NEXT: fld %st(1)
; SSE_32_WIN-NEXT: fsub %st(1), %st
; SSE_32_WIN-NEXT: fldl 8(%ebp)
; SSE_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fistpl {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fnstcw (%esp)
; SSE_32_WIN-NEXT: movzwl (%esp), %eax
; SSE_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fxch %st(1)
; SSE_32_WIN-NEXT: fistl {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw (%esp)
; SSE_32_WIN-NEXT: fxch %st(1)
; SSE_32_WIN-NEXT: fucompi %st(1), %st
; SSE_32_WIN-NEXT: fstp %st(0)
; SSE_32_WIN-NEXT: jbe LBB2_1
; SSE_32_WIN-NEXT: # %bb.2:
; SSE_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE_32_WIN-NEXT: addl $16, %esp
; SSE_32_WIN-NEXT: retl
; SSE_32_WIN-NEXT: LBB2_1:
; SSE_32_WIN-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; SSE_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE_32_WIN-NEXT: addl $16, %esp
; SSE_32_WIN-NEXT: movl %ebp, %esp
; SSE_32_WIN-NEXT: popl %ebp
; SSE_32_WIN-NEXT: retl
;
; SSE_32_LIN-LABEL: d_to_u32:
; SSE_32_LIN: # %bb.0:
; SSE_32_LIN-NEXT: subl $16, %esp
; SSE_32_LIN-NEXT: subl $20, %esp
; SSE_32_LIN-NEXT: fldl {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: flds {{\.LCPI.*}}
; SSE_32_LIN-NEXT: fld %st(1)
; SSE_32_LIN-NEXT: fsub %st(1), %st
; SSE_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fistpl {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fnstcw (%esp)
; SSE_32_LIN-NEXT: movzwl (%esp), %eax
; SSE_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fxch %st(1)
; SSE_32_LIN-NEXT: fistl {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw (%esp)
; SSE_32_LIN-NEXT: fxch %st(1)
; SSE_32_LIN-NEXT: fucompi %st(1), %st
; SSE_32_LIN-NEXT: fstp %st(0)
; SSE_32_LIN-NEXT: jbe .LBB2_1
; SSE_32_LIN-NEXT: # %bb.2:
; SSE_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE_32_LIN-NEXT: addl $16, %esp
; SSE_32_LIN-NEXT: retl
; SSE_32_LIN-NEXT: .LBB2_1:
; SSE_32_LIN-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; SSE_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE_32_LIN-NEXT: addl $16, %esp
; SSE_32_LIN-NEXT: addl $20, %esp
; SSE_32_LIN-NEXT: retl
;
; X87_WIN-LABEL: d_to_u32:
@ -507,52 +469,36 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind {
;
; SSE2_32_WIN-LABEL: x_to_u32:
; SSE2_32_WIN: # %bb.0:
; SSE2_32_WIN-NEXT: subl $8, %esp
; SSE2_32_WIN-NEXT: fldt {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: flds __real@4f000000
; SSE2_32_WIN-NEXT: fld %st(1)
; SSE2_32_WIN-NEXT: fsub %st(1), %st
; SSE2_32_WIN-NEXT: xorl %eax, %eax
; SSE2_32_WIN-NEXT: fxch %st(1)
; SSE2_32_WIN-NEXT: fucompi %st(2), %st
; SSE2_32_WIN-NEXT: fcmovnbe %st(1), %st
; SSE2_32_WIN-NEXT: fstp %st(1)
; SSE2_32_WIN-NEXT: setbe %al
; SSE2_32_WIN-NEXT: fnstcw (%esp)
; SSE2_32_WIN-NEXT: movzwl (%esp), %ecx
; SSE2_32_WIN-NEXT: orl $3072, %ecx # imm = 0xC00
; SSE2_32_WIN-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: pushl %ebp
; SSE2_32_WIN-NEXT: movl %esp, %ebp
; SSE2_32_WIN-NEXT: andl $-8, %esp
; SSE2_32_WIN-NEXT: subl $16, %esp
; SSE2_32_WIN-NEXT: fldt 8(%ebp)
; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE2_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE2_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: fistpl {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: fldcw (%esp)
; SSE2_32_WIN-NEXT: shll $31, %eax
; SSE2_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE2_32_WIN-NEXT: addl $8, %esp
; SSE2_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE2_32_WIN-NEXT: movl %ebp, %esp
; SSE2_32_WIN-NEXT: popl %ebp
; SSE2_32_WIN-NEXT: retl
;
; SSE2_32_LIN-LABEL: x_to_u32:
; SSE2_32_LIN: # %bb.0:
; SSE2_32_LIN-NEXT: subl $8, %esp
; SSE2_32_LIN-NEXT: subl $20, %esp
; SSE2_32_LIN-NEXT: fldt {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: flds {{\.LCPI.*}}
; SSE2_32_LIN-NEXT: fld %st(1)
; SSE2_32_LIN-NEXT: fsub %st(1), %st
; SSE2_32_LIN-NEXT: xorl %eax, %eax
; SSE2_32_LIN-NEXT: fxch %st(1)
; SSE2_32_LIN-NEXT: fucompi %st(2), %st
; SSE2_32_LIN-NEXT: fcmovnbe %st(1), %st
; SSE2_32_LIN-NEXT: fstp %st(1)
; SSE2_32_LIN-NEXT: setbe %al
; SSE2_32_LIN-NEXT: fnstcw (%esp)
; SSE2_32_LIN-NEXT: movzwl (%esp), %ecx
; SSE2_32_LIN-NEXT: orl $3072, %ecx # imm = 0xC00
; SSE2_32_LIN-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE2_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE2_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: fistpl {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: fldcw (%esp)
; SSE2_32_LIN-NEXT: shll $31, %eax
; SSE2_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE2_32_LIN-NEXT: addl $8, %esp
; SSE2_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE2_32_LIN-NEXT: addl $20, %esp
; SSE2_32_LIN-NEXT: retl
;
; SSE2_64_WIN-LABEL: x_to_u32:
@ -585,52 +531,36 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind {
;
; SSE_32_WIN-LABEL: x_to_u32:
; SSE_32_WIN: # %bb.0:
; SSE_32_WIN-NEXT: subl $8, %esp
; SSE_32_WIN-NEXT: fldt {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: flds __real@4f000000
; SSE_32_WIN-NEXT: fld %st(1)
; SSE_32_WIN-NEXT: fsub %st(1), %st
; SSE_32_WIN-NEXT: xorl %eax, %eax
; SSE_32_WIN-NEXT: fxch %st(1)
; SSE_32_WIN-NEXT: fucompi %st(2), %st
; SSE_32_WIN-NEXT: fcmovnbe %st(1), %st
; SSE_32_WIN-NEXT: fstp %st(1)
; SSE_32_WIN-NEXT: setbe %al
; SSE_32_WIN-NEXT: fnstcw (%esp)
; SSE_32_WIN-NEXT: movzwl (%esp), %ecx
; SSE_32_WIN-NEXT: orl $3072, %ecx # imm = 0xC00
; SSE_32_WIN-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: pushl %ebp
; SSE_32_WIN-NEXT: movl %esp, %ebp
; SSE_32_WIN-NEXT: andl $-8, %esp
; SSE_32_WIN-NEXT: subl $16, %esp
; SSE_32_WIN-NEXT: fldt 8(%ebp)
; SSE_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fistpl {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw (%esp)
; SSE_32_WIN-NEXT: shll $31, %eax
; SSE_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE_32_WIN-NEXT: addl $8, %esp
; SSE_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE_32_WIN-NEXT: movl %ebp, %esp
; SSE_32_WIN-NEXT: popl %ebp
; SSE_32_WIN-NEXT: retl
;
; SSE_32_LIN-LABEL: x_to_u32:
; SSE_32_LIN: # %bb.0:
; SSE_32_LIN-NEXT: subl $8, %esp
; SSE_32_LIN-NEXT: subl $20, %esp
; SSE_32_LIN-NEXT: fldt {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: flds {{\.LCPI.*}}
; SSE_32_LIN-NEXT: fld %st(1)
; SSE_32_LIN-NEXT: fsub %st(1), %st
; SSE_32_LIN-NEXT: xorl %eax, %eax
; SSE_32_LIN-NEXT: fxch %st(1)
; SSE_32_LIN-NEXT: fucompi %st(2), %st
; SSE_32_LIN-NEXT: fcmovnbe %st(1), %st
; SSE_32_LIN-NEXT: fstp %st(1)
; SSE_32_LIN-NEXT: setbe %al
; SSE_32_LIN-NEXT: fnstcw (%esp)
; SSE_32_LIN-NEXT: movzwl (%esp), %ecx
; SSE_32_LIN-NEXT: orl $3072, %ecx # imm = 0xC00
; SSE_32_LIN-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; SSE_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00
; SSE_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fistpl {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw (%esp)
; SSE_32_LIN-NEXT: shll $31, %eax
; SSE_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE_32_LIN-NEXT: addl $8, %esp
; SSE_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE_32_LIN-NEXT: addl $20, %esp
; SSE_32_LIN-NEXT: retl
;
; X87_WIN-LABEL: x_to_u32: