forked from OSchip/llvm-project
[X86] Don't call LowerUINT_TO_FP_i32 for i32->f80 on 32-bit targets with sse2.
We were performing an emulated i32->f64 in the SSE registers, then storing that value to memory and doing a extload into the X87 domain. After this patch we'll now just store the i32 to memory along with an i32 0. Then do a 64-bit FILD to f80 completely in the X87 unit. This matches what we do without SSE.
This commit is contained in:
parent
1b264a8263
commit
be8f217b18
|
@ -19331,7 +19331,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
||||||
|
|
||||||
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
|
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
|
||||||
return LowerUINT_TO_FP_i64(Op, DAG, Subtarget);
|
return LowerUINT_TO_FP_i64(Op, DAG, Subtarget);
|
||||||
if (SrcVT == MVT::i32 && X86ScalarSSEf64)
|
if (SrcVT == MVT::i32 && X86ScalarSSEf64 && DstVT != MVT::f80)
|
||||||
return LowerUINT_TO_FP_i32(Op, DAG, Subtarget);
|
return LowerUINT_TO_FP_i32(Op, DAG, Subtarget);
|
||||||
if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
|
if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
|
@ -214,21 +214,19 @@ define double @s32_to_d(i32 %a) nounwind {
|
||||||
}
|
}
|
||||||
|
|
||||||
define x86_fp80 @u32_to_x(i32 %a) nounwind {
|
define x86_fp80 @u32_to_x(i32 %a) nounwind {
|
||||||
; AVX512_32-LABEL: u32_to_x:
|
; CHECK32-LABEL: u32_to_x:
|
||||||
; AVX512_32: # %bb.0:
|
; CHECK32: # %bb.0:
|
||||||
; AVX512_32-NEXT: pushl %ebp
|
; CHECK32-NEXT: pushl %ebp
|
||||||
; AVX512_32-NEXT: movl %esp, %ebp
|
; CHECK32-NEXT: movl %esp, %ebp
|
||||||
; AVX512_32-NEXT: andl $-8, %esp
|
; CHECK32-NEXT: andl $-8, %esp
|
||||||
; AVX512_32-NEXT: subl $8, %esp
|
; CHECK32-NEXT: subl $8, %esp
|
||||||
; AVX512_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
; CHECK32-NEXT: movl 8(%ebp), %eax
|
||||||
; AVX512_32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
; CHECK32-NEXT: movl %eax, (%esp)
|
||||||
; AVX512_32-NEXT: vorpd %xmm0, %xmm1, %xmm1
|
; CHECK32-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||||
; AVX512_32-NEXT: vsubsd %xmm0, %xmm1, %xmm0
|
; CHECK32-NEXT: fildll (%esp)
|
||||||
; AVX512_32-NEXT: vmovsd %xmm0, (%esp)
|
; CHECK32-NEXT: movl %ebp, %esp
|
||||||
; AVX512_32-NEXT: fldl (%esp)
|
; CHECK32-NEXT: popl %ebp
|
||||||
; AVX512_32-NEXT: movl %ebp, %esp
|
; CHECK32-NEXT: retl
|
||||||
; AVX512_32-NEXT: popl %ebp
|
|
||||||
; AVX512_32-NEXT: retl
|
|
||||||
;
|
;
|
||||||
; CHECK64-LABEL: u32_to_x:
|
; CHECK64-LABEL: u32_to_x:
|
||||||
; CHECK64: # %bb.0:
|
; CHECK64: # %bb.0:
|
||||||
|
@ -236,36 +234,6 @@ define x86_fp80 @u32_to_x(i32 %a) nounwind {
|
||||||
; CHECK64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
; CHECK64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||||
; CHECK64-NEXT: fildll -{{[0-9]+}}(%rsp)
|
; CHECK64-NEXT: fildll -{{[0-9]+}}(%rsp)
|
||||||
; CHECK64-NEXT: retq
|
; CHECK64-NEXT: retq
|
||||||
;
|
|
||||||
; SSE2_32-LABEL: u32_to_x:
|
|
||||||
; SSE2_32: # %bb.0:
|
|
||||||
; SSE2_32-NEXT: pushl %ebp
|
|
||||||
; SSE2_32-NEXT: movl %esp, %ebp
|
|
||||||
; SSE2_32-NEXT: andl $-8, %esp
|
|
||||||
; SSE2_32-NEXT: subl $8, %esp
|
|
||||||
; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
|
||||||
; SSE2_32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
|
||||||
; SSE2_32-NEXT: orpd %xmm0, %xmm1
|
|
||||||
; SSE2_32-NEXT: subsd %xmm0, %xmm1
|
|
||||||
; SSE2_32-NEXT: movsd %xmm1, (%esp)
|
|
||||||
; SSE2_32-NEXT: fldl (%esp)
|
|
||||||
; SSE2_32-NEXT: movl %ebp, %esp
|
|
||||||
; SSE2_32-NEXT: popl %ebp
|
|
||||||
; SSE2_32-NEXT: retl
|
|
||||||
;
|
|
||||||
; X87-LABEL: u32_to_x:
|
|
||||||
; X87: # %bb.0:
|
|
||||||
; X87-NEXT: pushl %ebp
|
|
||||||
; X87-NEXT: movl %esp, %ebp
|
|
||||||
; X87-NEXT: andl $-8, %esp
|
|
||||||
; X87-NEXT: subl $8, %esp
|
|
||||||
; X87-NEXT: movl 8(%ebp), %eax
|
|
||||||
; X87-NEXT: movl %eax, (%esp)
|
|
||||||
; X87-NEXT: movl $0, {{[0-9]+}}(%esp)
|
|
||||||
; X87-NEXT: fildll (%esp)
|
|
||||||
; X87-NEXT: movl %ebp, %esp
|
|
||||||
; X87-NEXT: popl %ebp
|
|
||||||
; X87-NEXT: retl
|
|
||||||
%r = uitofp i32 %a to x86_fp80
|
%r = uitofp i32 %a to x86_fp80
|
||||||
ret x86_fp80 %r
|
ret x86_fp80 %r
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue