forked from OSchip/llvm-project
[X86] Enable the use of movlps for i64 atomic load on 32-bit targets with sse1.
Still a little room for improvement by using movlps to store to the stack temporary needed to move data out of the xmm register after the load.
This commit is contained in:
parent
2a10f8019d
commit
15b6aa7448
|
@ -27544,7 +27544,7 @@ X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
|
|||
LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
|
||||
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
|
||||
!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
|
||||
(Subtarget.hasSSE2() || Subtarget.hasX87()))
|
||||
(Subtarget.hasSSE1() || Subtarget.hasX87()))
|
||||
return AtomicExpansionKind::None;
|
||||
|
||||
return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
|
||||
|
@ -29667,15 +29667,27 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
|
|||
Attribute::NoImplicitFloat);
|
||||
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
|
||||
auto *Node = cast<AtomicSDNode>(N);
|
||||
if (Subtarget.hasSSE2()) {
|
||||
// Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the
|
||||
// lower 64-bits.
|
||||
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
|
||||
if (Subtarget.hasSSE1()) {
|
||||
// Use a VZEXT_LOAD which will be selected as MOVQ or XORPS+MOVLPS.
|
||||
// Then extract the lower 64-bits.
|
||||
MVT LdVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
|
||||
SDVTList Tys = DAG.getVTList(LdVT, MVT::Other);
|
||||
SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };
|
||||
SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
|
||||
MVT::i64, Node->getMemOperand());
|
||||
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
|
||||
if (Subtarget.hasSSE2()) {
|
||||
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
Results.push_back(Res);
|
||||
Results.push_back(Ld.getValue(1));
|
||||
return;
|
||||
}
|
||||
// We use an alternative sequence for SSE1 that extracts as v2f32 and
|
||||
// then casts to i64. This avoids a 128-bit stack temporary being
|
||||
// created by type legalization if we were to cast v4f32->v2i64.
|
||||
SDValue Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2f32, Ld,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
Res = DAG.getBitcast(MVT::i64, Res);
|
||||
Results.push_back(Res);
|
||||
Results.push_back(Ld.getValue(1));
|
||||
return;
|
||||
|
|
|
@ -107,18 +107,17 @@ define void @fadd_64r(double* %loc, double %val) nounwind {
|
|||
; X86-SSE1-NEXT: pushl %ebp
|
||||
; X86-SSE1-NEXT: movl %esp, %ebp
|
||||
; X86-SSE1-NEXT: andl $-8, %esp
|
||||
; X86-SSE1-NEXT: subl $24, %esp
|
||||
; X86-SSE1-NEXT: subl $16, %esp
|
||||
; X86-SSE1-NEXT: movl 8(%ebp), %eax
|
||||
; X86-SSE1-NEXT: fildll (%eax)
|
||||
; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-SSE1-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl %ecx, (%esp)
|
||||
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-SSE1-NEXT: xorps %xmm1, %xmm1
|
||||
; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm1, (%esp)
|
||||
; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: fldl (%esp)
|
||||
; X86-SSE1-NEXT: faddl 12(%ebp)
|
||||
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
|
||||
; X86-SSE1-NEXT: movlps %xmm0, (%eax)
|
||||
; X86-SSE1-NEXT: movl %ebp, %esp
|
||||
|
@ -274,17 +273,16 @@ define void @fadd_64g() nounwind {
|
|||
; X86-SSE1-NEXT: pushl %ebp
|
||||
; X86-SSE1-NEXT: movl %esp, %ebp
|
||||
; X86-SSE1-NEXT: andl $-8, %esp
|
||||
; X86-SSE1-NEXT: subl $24, %esp
|
||||
; X86-SSE1-NEXT: fildll glob64
|
||||
; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl %eax, (%esp)
|
||||
; X86-SSE1-NEXT: subl $16, %esp
|
||||
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-SSE1-NEXT: xorps %xmm1, %xmm1
|
||||
; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm1, (%esp)
|
||||
; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: fld1
|
||||
; X86-SSE1-NEXT: faddl (%esp)
|
||||
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
|
||||
; X86-SSE1-NEXT: movlps %xmm0, glob64
|
||||
; X86-SSE1-NEXT: movl %ebp, %esp
|
||||
|
@ -438,17 +436,16 @@ define void @fadd_64imm() nounwind {
|
|||
; X86-SSE1-NEXT: pushl %ebp
|
||||
; X86-SSE1-NEXT: movl %esp, %ebp
|
||||
; X86-SSE1-NEXT: andl $-8, %esp
|
||||
; X86-SSE1-NEXT: subl $24, %esp
|
||||
; X86-SSE1-NEXT: fildll -559038737
|
||||
; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl %eax, (%esp)
|
||||
; X86-SSE1-NEXT: subl $16, %esp
|
||||
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-SSE1-NEXT: xorps %xmm1, %xmm1
|
||||
; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm1, (%esp)
|
||||
; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: fld1
|
||||
; X86-SSE1-NEXT: faddl (%esp)
|
||||
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
|
||||
; X86-SSE1-NEXT: movlps %xmm0, -559038737
|
||||
; X86-SSE1-NEXT: movl %ebp, %esp
|
||||
|
@ -608,17 +605,16 @@ define void @fadd_64stack() nounwind {
|
|||
; X86-SSE1-NEXT: pushl %ebp
|
||||
; X86-SSE1-NEXT: movl %esp, %ebp
|
||||
; X86-SSE1-NEXT: andl $-8, %esp
|
||||
; X86-SSE1-NEXT: subl $32, %esp
|
||||
; X86-SSE1-NEXT: fildll {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl %eax, (%esp)
|
||||
; X86-SSE1-NEXT: subl $24, %esp
|
||||
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-SSE1-NEXT: xorps %xmm1, %xmm1
|
||||
; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm1, (%esp)
|
||||
; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: fld1
|
||||
; X86-SSE1-NEXT: faddl (%esp)
|
||||
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
|
||||
; X86-SSE1-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl %ebp, %esp
|
||||
|
@ -712,25 +708,22 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind {
|
|||
; X86-SSE1: # %bb.0: # %bb
|
||||
; X86-SSE1-NEXT: pushl %ebp
|
||||
; X86-SSE1-NEXT: movl %esp, %ebp
|
||||
; X86-SSE1-NEXT: pushl %esi
|
||||
; X86-SSE1-NEXT: andl $-8, %esp
|
||||
; X86-SSE1-NEXT: subl $32, %esp
|
||||
; X86-SSE1-NEXT: subl $16, %esp
|
||||
; X86-SSE1-NEXT: movl 20(%ebp), %eax
|
||||
; X86-SSE1-NEXT: movl 8(%ebp), %ecx
|
||||
; X86-SSE1-NEXT: fildll (%ecx,%eax,8)
|
||||
; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-SSE1-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl %edx, (%esp)
|
||||
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-SSE1-NEXT: xorps %xmm1, %xmm1
|
||||
; X86-SSE1-NEXT: movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm1, (%esp)
|
||||
; X86-SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: fldl (%esp)
|
||||
; X86-SSE1-NEXT: faddl 12(%ebp)
|
||||
; X86-SSE1-NEXT: fstpl {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
|
||||
; X86-SSE1-NEXT: movlps %xmm0, (%ecx,%eax,8)
|
||||
; X86-SSE1-NEXT: leal -4(%ebp), %esp
|
||||
; X86-SSE1-NEXT: popl %esi
|
||||
; X86-SSE1-NEXT: movl %ebp, %esp
|
||||
; X86-SSE1-NEXT: popl %ebp
|
||||
; X86-SSE1-NEXT: retl
|
||||
;
|
||||
|
|
|
@ -272,17 +272,16 @@ define float @load_float(float* %fptr) {
|
|||
define double @load_double(double* %fptr) {
|
||||
; X86-SSE1-LABEL: load_double:
|
||||
; X86-SSE1: # %bb.0:
|
||||
; X86-SSE1-NEXT: subl $20, %esp
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 24
|
||||
; X86-SSE1-NEXT: subl $12, %esp
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE1-NEXT: fildll (%eax)
|
||||
; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl %eax, (%esp)
|
||||
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm0, (%esp)
|
||||
; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: fldl (%esp)
|
||||
; X86-SSE1-NEXT: addl $20, %esp
|
||||
; X86-SSE1-NEXT: addl $12, %esp
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE1-NEXT: retl
|
||||
;
|
||||
|
@ -660,17 +659,16 @@ define float @load_float_seq_cst(float* %fptr) {
|
|||
define double @load_double_seq_cst(double* %fptr) {
|
||||
; X86-SSE1-LABEL: load_double_seq_cst:
|
||||
; X86-SSE1: # %bb.0:
|
||||
; X86-SSE1-NEXT: subl $20, %esp
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 24
|
||||
; X86-SSE1-NEXT: subl $12, %esp
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 16
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE1-NEXT: fildll (%eax)
|
||||
; X86-SSE1-NEXT: fistpll {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: movl %eax, (%esp)
|
||||
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
|
||||
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm0, (%esp)
|
||||
; X86-SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
|
||||
; X86-SSE1-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-SSE1-NEXT: fldl (%esp)
|
||||
; X86-SSE1-NEXT: addl $20, %esp
|
||||
; X86-SSE1-NEXT: addl $12, %esp
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE1-NEXT: retl
|
||||
;
|
||||
|
|
Loading…
Reference in New Issue