forked from OSchip/llvm-project
[X86][MMX] Match MMX fp_to_sint conversions from XMM registers
We currently perform the various fp_to_sint XMM conversion and then transfer to the MMX register (on 32-bit via the stack). This patch improves support for MOVDQ2Q XMM to MMX transfers and adds the XMM->MMX fp_to_sint direct conversion patterns. The SSE2 specifications are the same as for XMM->XMM and XMM->MMX rounding/exceptions/etc. Differential Revision: https://reviews.llvm.org/D30868 llvm-svn: 298943
This commit is contained in:
parent
2d2b5b09e0
commit
c7c5aa47cf
|
@ -28942,10 +28942,11 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
|
|||
EVT VT = N->getValueType(0);
|
||||
EVT SrcVT = N0.getValueType();
|
||||
|
||||
// Detect bitcasts between i32 to x86mmx low word. Since MMX types are
|
||||
// special and don't usually play with other vector types, it's better to
|
||||
// handle them early to be sure we emit efficient code by avoiding
|
||||
// store-load conversions.
|
||||
// Since MMX types are special and don't usually play with other vector types,
|
||||
// it's better to handle them early to be sure we emit efficient code by
|
||||
// avoiding store-load conversions.
|
||||
|
||||
// Detect bitcasts between i32 to x86mmx low word.
|
||||
if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR &&
|
||||
SrcVT == MVT::v2i32 && isNullConstant(N0.getOperand(1))) {
|
||||
SDValue N00 = N0->getOperand(0);
|
||||
|
@ -28953,6 +28954,14 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
|
|||
return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00);
|
||||
}
|
||||
|
||||
// Detect bitcasts between v2i64/v2f64 extraction to x86mmx.
|
||||
if (VT == MVT::x86mmx && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
|
||||
isNullConstant(N0.getOperand(1))) {
|
||||
SDValue N00 = N0->getOperand(0);
|
||||
if (N00.getValueType().is128BitVector())
|
||||
return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT, N00);
|
||||
}
|
||||
|
||||
// Convert a bitcasted integer logic operation that has one bitcasted
|
||||
// floating-point operand into a floating-point logic operation. This may
|
||||
// create a load of a constant, but that is cheaper than materializing the
|
||||
|
|
|
@ -647,4 +647,16 @@ def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
|
|||
(MMX_MOVQ2FR64rr VR64:$src)>;
|
||||
def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
|
||||
(MMX_MOVFR642Qrr FR64:$src)>;
|
||||
def : Pat<(x86mmx (MMX_X86movdq2q
|
||||
(bc_v2i64 (v4i32 (int_x86_sse2_cvtps2dq VR128:$src))))),
|
||||
(MMX_CVTPS2PIirr VR128:$src)>;
|
||||
def : Pat<(x86mmx (MMX_X86movdq2q
|
||||
(bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))),
|
||||
(MMX_CVTTPS2PIirr VR128:$src)>;
|
||||
def : Pat<(x86mmx (MMX_X86movdq2q
|
||||
(bc_v2i64 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
|
||||
(MMX_CVTPD2PIirr VR128:$src)>;
|
||||
def : Pat<(x86mmx (MMX_X86movdq2q
|
||||
(bc_v2i64 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
|
||||
(MMX_CVTTPD2PIirr VR128:$src)>;
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X64
|
||||
|
||||
; FIXME: If we are transferring XMM conversion results to MMX registers we could use the MMX equivalents
|
||||
; If we are transferring XMM conversion results to MMX registers we could use the MMX equivalents
|
||||
; (CVTPD2PI/CVTTPD2PI + CVTPS2PI/CVTTPS2PI) without affecting rounding/exceptions etc.
|
||||
|
||||
define void @cvt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
|
||||
|
@ -11,13 +11,9 @@ define void @cvt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
|
|||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: andl $-8, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: subl $8, %esp
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: cvtpd2dq %xmm0, %xmm0
|
||||
; X86-NEXT: movd %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X86-NEXT: movd %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
|
||||
; X86-NEXT: cvtpd2pi %xmm0, %mm0
|
||||
; X86-NEXT: paddd %mm0, %mm0
|
||||
; X86-NEXT: movq %mm0, (%esp)
|
||||
; X86-NEXT: movl (%esp), %ecx
|
||||
|
@ -30,8 +26,7 @@ define void @cvt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
|
|||
;
|
||||
; X64-LABEL: cvt_v2f64_v2i32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: cvtpd2dq %xmm0, %xmm0
|
||||
; X64-NEXT: movdq2q %xmm0, %mm0
|
||||
; X64-NEXT: cvtpd2pi %xmm0, %mm0
|
||||
; X64-NEXT: paddd %mm0, %mm0
|
||||
; X64-NEXT: movq %mm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
|
@ -52,13 +47,9 @@ define void @cvtt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
|
|||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: andl $-8, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: subl $8, %esp
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||
; X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X86-NEXT: movd %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
|
||||
; X86-NEXT: cvttpd2pi %xmm0, %mm0
|
||||
; X86-NEXT: paddd %mm0, %mm0
|
||||
; X86-NEXT: movq %mm0, (%esp)
|
||||
; X86-NEXT: movl (%esp), %ecx
|
||||
|
@ -71,8 +62,7 @@ define void @cvtt_v2f64_v2i32(<2 x double>, <1 x i64>*) nounwind {
|
|||
;
|
||||
; X64-LABEL: cvtt_v2f64_v2i32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: cvttpd2dq %xmm0, %xmm0
|
||||
; X64-NEXT: movdq2q %xmm0, %mm0
|
||||
; X64-NEXT: cvttpd2pi %xmm0, %mm0
|
||||
; X64-NEXT: paddd %mm0, %mm0
|
||||
; X64-NEXT: movq %mm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
|
@ -131,13 +121,9 @@ define void @cvt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
|
|||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: andl $-8, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: subl $8, %esp
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: cvtps2dq %xmm0, %xmm0
|
||||
; X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X86-NEXT: movd %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
|
||||
; X86-NEXT: cvtps2pi %xmm0, %mm0
|
||||
; X86-NEXT: paddd %mm0, %mm0
|
||||
; X86-NEXT: movq %mm0, (%esp)
|
||||
; X86-NEXT: movl (%esp), %ecx
|
||||
|
@ -150,8 +136,7 @@ define void @cvt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
|
|||
;
|
||||
; X64-LABEL: cvt_v2f32_v2i32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: cvtps2dq %xmm0, %xmm0
|
||||
; X64-NEXT: movdq2q %xmm0, %mm0
|
||||
; X64-NEXT: cvtps2pi %xmm0, %mm0
|
||||
; X64-NEXT: paddd %mm0, %mm0
|
||||
; X64-NEXT: movq %mm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
|
@ -172,13 +157,9 @@ define void @cvtt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
|
|||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: andl $-8, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: subl $8, %esp
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; X86-NEXT: movd %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X86-NEXT: movd %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
|
||||
; X86-NEXT: cvttps2pi %xmm0, %mm0
|
||||
; X86-NEXT: paddd %mm0, %mm0
|
||||
; X86-NEXT: movq %mm0, (%esp)
|
||||
; X86-NEXT: movl (%esp), %ecx
|
||||
|
@ -191,8 +172,7 @@ define void @cvtt_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
|
|||
;
|
||||
; X64-LABEL: cvtt_v2f32_v2i32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; X64-NEXT: movdq2q %xmm0, %mm0
|
||||
; X64-NEXT: cvttps2pi %xmm0, %mm0
|
||||
; X64-NEXT: paddd %mm0, %mm0
|
||||
; X64-NEXT: movq %mm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
|
@ -213,13 +193,9 @@ define void @fptosi_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
|
|||
; X86-NEXT: pushl %ebp
|
||||
; X86-NEXT: movl %esp, %ebp
|
||||
; X86-NEXT: andl $-8, %esp
|
||||
; X86-NEXT: subl $16, %esp
|
||||
; X86-NEXT: subl $8, %esp
|
||||
; X86-NEXT: movl 8(%ebp), %eax
|
||||
; X86-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; X86-NEXT: movd %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X86-NEXT: movd %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
|
||||
; X86-NEXT: cvttps2pi %xmm0, %mm0
|
||||
; X86-NEXT: paddd %mm0, %mm0
|
||||
; X86-NEXT: movq %mm0, (%esp)
|
||||
; X86-NEXT: movl (%esp), %ecx
|
||||
|
@ -232,8 +208,7 @@ define void @fptosi_v2f32_v2i32(<4 x float>, <1 x i64>*) nounwind {
|
|||
;
|
||||
; X64-LABEL: fptosi_v2f32_v2i32:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: cvttps2dq %xmm0, %xmm0
|
||||
; X64-NEXT: movdq2q %xmm0, %mm0
|
||||
; X64-NEXT: cvttps2pi %xmm0, %mm0
|
||||
; X64-NEXT: paddd %mm0, %mm0
|
||||
; X64-NEXT: movq %mm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
|
|
Loading…
Reference in New Issue