From be8f217b180e134d568ff491b045d05c137b6234 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 14 Jan 2020 22:40:56 -0800 Subject: [PATCH] [X86] Don't call LowerUINT_TO_FP_i32 for i32->f80 on 32-bit targets with sse2. We were performing an emulated i32->f64 in the SSE registers, then storing that value to memory and doing a extload into the X87 domain. After this patch we'll now just store the i32 to memory along with an i32 0. Then do a 64-bit FILD to f80 completely in the X87 unit. This matches what we do without SSE. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/X86/scalar-int-to-fp.ll | 58 +++++------------------ 2 files changed, 14 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d7593f525731..0f152968ddfd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19331,7 +19331,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) return LowerUINT_TO_FP_i64(Op, DAG, Subtarget); - if (SrcVT == MVT::i32 && X86ScalarSSEf64) + if (SrcVT == MVT::i32 && X86ScalarSSEf64 && DstVT != MVT::f80) return LowerUINT_TO_FP_i32(Op, DAG, Subtarget); if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32) return SDValue(); diff --git a/llvm/test/CodeGen/X86/scalar-int-to-fp.ll b/llvm/test/CodeGen/X86/scalar-int-to-fp.ll index 617710448976..29e8df668b57 100644 --- a/llvm/test/CodeGen/X86/scalar-int-to-fp.ll +++ b/llvm/test/CodeGen/X86/scalar-int-to-fp.ll @@ -214,21 +214,19 @@ define double @s32_to_d(i32 %a) nounwind { } define x86_fp80 @u32_to_x(i32 %a) nounwind { -; AVX512_32-LABEL: u32_to_x: -; AVX512_32: # %bb.0: -; AVX512_32-NEXT: pushl %ebp -; AVX512_32-NEXT: movl %esp, %ebp -; AVX512_32-NEXT: andl $-8, %esp -; AVX512_32-NEXT: subl $8, %esp -; AVX512_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512_32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX512_32-NEXT: vorpd %xmm0, %xmm1, %xmm1 -; AVX512_32-NEXT: vsubsd %xmm0, %xmm1, %xmm0 -; AVX512_32-NEXT: vmovsd %xmm0, (%esp) -; AVX512_32-NEXT: fldl (%esp) -; AVX512_32-NEXT: movl %ebp, %esp -; AVX512_32-NEXT: popl %ebp -; AVX512_32-NEXT: retl +; CHECK32-LABEL: u32_to_x: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %ebp +; CHECK32-NEXT: movl %esp, %ebp +; CHECK32-NEXT: andl $-8, %esp +; CHECK32-NEXT: subl $8, %esp +; CHECK32-NEXT: movl 8(%ebp), %eax +; CHECK32-NEXT: movl %eax, (%esp) +; CHECK32-NEXT: movl $0, {{[0-9]+}}(%esp) +; CHECK32-NEXT: fildll (%esp) +; CHECK32-NEXT: movl %ebp, %esp +; CHECK32-NEXT: popl %ebp +; CHECK32-NEXT: retl ; ; CHECK64-LABEL: u32_to_x: ; CHECK64: # %bb.0: @@ -236,36 +234,6 @@ define x86_fp80 @u32_to_x(i32 %a) nounwind { ; CHECK64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; CHECK64-NEXT: fildll -{{[0-9]+}}(%rsp) ; CHECK64-NEXT: retq -; -; SSE2_32-LABEL: u32_to_x: -; SSE2_32: # %bb.0: -; SSE2_32-NEXT: pushl %ebp -; SSE2_32-NEXT: movl %esp, %ebp -; SSE2_32-NEXT: andl $-8, %esp -; SSE2_32-NEXT: subl $8, %esp -; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE2_32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2_32-NEXT: orpd %xmm0, %xmm1 -; SSE2_32-NEXT: subsd %xmm0, %xmm1 -; SSE2_32-NEXT: movsd %xmm1, (%esp) -; SSE2_32-NEXT: fldl (%esp) -; SSE2_32-NEXT: movl %ebp, %esp -; SSE2_32-NEXT: popl %ebp -; SSE2_32-NEXT: retl -; -; X87-LABEL: u32_to_x: -; X87: # %bb.0: -; X87-NEXT: pushl %ebp -; X87-NEXT: movl %esp, %ebp -; X87-NEXT: andl $-8, %esp -; X87-NEXT: subl $8, %esp -; X87-NEXT: movl 8(%ebp), %eax -; X87-NEXT: movl %eax, (%esp) -; X87-NEXT: movl $0, {{[0-9]+}}(%esp) -; X87-NEXT: fildll (%esp) -; X87-NEXT: movl %ebp, %esp -; X87-NEXT: popl %ebp -; X87-NEXT: retl %r = uitofp i32 %a to x86_fp80 ret x86_fp80 %r }