forked from OSchip/llvm-project
[X86] Use MOVQ for i64 atomic_stores when SSE2 is enabled
Summary: If we have SSE2 we can use a MOVQ to store 64-bits and avoid falling back to a cmpxchg8b loop. If its a seq_cst store we need to insert an mfence after the store. Reviewers: spatel, RKSimon, reames, jfb, efriedma Reviewed By: RKSimon Subscribers: hiraditya, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60546 llvm-svn: 359368
This commit is contained in:
parent
31cfb311c5
commit
063b471ff7
|
@ -25625,8 +25625,18 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
|
||||
// TODO: In 32-bit mode, use FISTP when X87 is available?
|
||||
bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
|
||||
return needsCmpXchgNb(SI->getValueOperand()->getType());
|
||||
Type *MemType = SI->getValueOperand()->getType();
|
||||
|
||||
bool NoImplicitFloatOps =
|
||||
SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
|
||||
if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
|
||||
!Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2())
|
||||
return false;
|
||||
|
||||
return needsCmpXchgNb(MemType);
|
||||
}
|
||||
|
||||
// Note: this turns large loads into lock cmpxchg8b/16b.
|
||||
|
@ -26262,28 +26272,54 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
|
|||
DAG.getUNDEF(VT), LockOp.getValue(1));
|
||||
}
|
||||
|
||||
static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
|
||||
SDNode *Node = Op.getNode();
|
||||
static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
auto *Node = cast<AtomicSDNode>(Op.getNode());
|
||||
SDLoc dl(Node);
|
||||
EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
|
||||
EVT VT = Node->getMemoryVT();
|
||||
|
||||
bool IsSeqCst = Node->getOrdering() == AtomicOrdering::SequentiallyConsistent;
|
||||
bool IsTypeLegal = DAG.getTargetLoweringInfo().isTypeLegal(VT);
|
||||
|
||||
// If this store is not sequentially consistent and the type is legal
|
||||
// we can just keep it.
|
||||
if (!IsSeqCst && IsTypeLegal)
|
||||
return Op;
|
||||
|
||||
if (VT == MVT::i64 && !IsTypeLegal) {
|
||||
// For illegal i64 atomic_stores, we can try to use MOVQ if SSE2 is enabled.
|
||||
// FIXME: Use movlps with SSE1.
|
||||
// FIXME: Use fist with X87.
|
||||
bool NoImplicitFloatOps =
|
||||
DAG.getMachineFunction().getFunction().hasFnAttribute(
|
||||
Attribute::NoImplicitFloat);
|
||||
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
|
||||
Subtarget.hasSSE2()) {
|
||||
SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
|
||||
Node->getOperand(2));
|
||||
SDVTList Tys = DAG.getVTList(MVT::Other);
|
||||
SDValue Ops[] = { Node->getChain(), SclToVec, Node->getBasePtr() };
|
||||
SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys,
|
||||
Ops, MVT::i64,
|
||||
Node->getMemOperand());
|
||||
|
||||
// If this is a sequentially consistent store, also emit an mfence.
|
||||
if (IsSeqCst)
|
||||
Chain = DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Chain);
|
||||
|
||||
return Chain;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert seq_cst store -> xchg
|
||||
// Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)
|
||||
// FIXME: On 32-bit, store -> fist or movq would be more efficient
|
||||
// (The only way to get a 16-byte store is cmpxchg16b)
|
||||
// FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.
|
||||
if (cast<AtomicSDNode>(Node)->getOrdering() ==
|
||||
AtomicOrdering::SequentiallyConsistent ||
|
||||
!DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
|
||||
SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
|
||||
cast<AtomicSDNode>(Node)->getMemoryVT(),
|
||||
Node->getOperand(0),
|
||||
Node->getOperand(1), Node->getOperand(2),
|
||||
cast<AtomicSDNode>(Node)->getMemOperand());
|
||||
return Swap.getValue(1);
|
||||
}
|
||||
// Other atomic stores have a simple pattern.
|
||||
return Op;
|
||||
SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
|
||||
Node->getMemoryVT(),
|
||||
Node->getOperand(0),
|
||||
Node->getOperand(1), Node->getOperand(2),
|
||||
Node->getMemOperand());
|
||||
return Swap.getValue(1);
|
||||
}
|
||||
|
||||
static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
|
||||
|
@ -26704,7 +26740,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
case ISD::ATOMIC_LOAD_OR:
|
||||
case ISD::ATOMIC_LOAD_XOR:
|
||||
case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget);
|
||||
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG);
|
||||
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG, Subtarget);
|
||||
case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG);
|
||||
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
|
||||
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
|
||||
|
@ -27812,6 +27848,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::LAND: return "X86ISD::LAND";
|
||||
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
|
||||
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
|
||||
case X86ISD::VEXTRACT_STORE: return "X86ISD::VEXTRACT_STORE";
|
||||
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
|
||||
case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS";
|
||||
case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
|
||||
|
|
|
@ -590,6 +590,9 @@ namespace llvm {
|
|||
// Load, scalar_to_vector, and zero extend.
|
||||
VZEXT_LOAD,
|
||||
|
||||
// extract_vector_elt, store.
|
||||
VEXTRACT_STORE,
|
||||
|
||||
// Store FP control world into i16 memory.
|
||||
FNSTCW16m,
|
||||
|
||||
|
|
|
@ -3893,6 +3893,11 @@ def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
|
|||
def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
|
||||
(VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(X86vextractstore (v2i64 VR128X:$src), addr:$dst),
|
||||
(VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
|
||||
}
|
||||
|
||||
// Move Scalar Single to Double Int
|
||||
//
|
||||
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
|
||||
|
|
|
@ -101,6 +101,8 @@ def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
|
|||
|
||||
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
def X86vextractstore : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
|
||||
def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisInt<0>, SDTCisInt<1>,
|
||||
|
|
|
@ -4406,12 +4406,18 @@ let Predicates = [UseAVX] in {
|
|||
(SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>;
|
||||
def : Pat<(v4i64 (X86vzload addr:$src)),
|
||||
(SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>;
|
||||
|
||||
def : Pat<(X86vextractstore (v2i64 VR128:$src), addr:$dst),
|
||||
(VMOVPQI2QImr addr:$dst, VR128:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseSSE2] in {
|
||||
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
|
||||
(MOVQI2PQIrm addr:$src)>;
|
||||
def : Pat<(v2i64 (X86vzload addr:$src)), (MOVQI2PQIrm addr:$src)>;
|
||||
|
||||
def : Pat<(X86vextractstore (v2i64 VR128:$src), addr:$dst),
|
||||
(MOVPQI2QImr addr:$dst, VR128:$src)>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
|
|
@ -148,27 +148,15 @@ define void @fadd_64r(double* %loc, double %val) nounwind {
|
|||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: pushl %ebp
|
||||
; X86-SSE2-NEXT: movl %esp, %ebp
|
||||
; X86-SSE2-NEXT: pushl %ebx
|
||||
; X86-SSE2-NEXT: pushl %esi
|
||||
; X86-SSE2-NEXT: andl $-8, %esp
|
||||
; X86-SSE2-NEXT: subl $8, %esp
|
||||
; X86-SSE2-NEXT: movl 8(%ebp), %esi
|
||||
; X86-SSE2-NEXT: movl 8(%ebp), %eax
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0
|
||||
; X86-SSE2-NEXT: movsd %xmm0, (%esp)
|
||||
; X86-SSE2-NEXT: movl (%esp), %ebx
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE2-NEXT: movl (%esi), %eax
|
||||
; X86-SSE2-NEXT: movl 4(%esi), %edx
|
||||
; X86-SSE2-NEXT: .p2align 4, 0x90
|
||||
; X86-SSE2-NEXT: .LBB1_1: # %atomicrmw.start
|
||||
; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-SSE2-NEXT: lock cmpxchg8b (%esi)
|
||||
; X86-SSE2-NEXT: jne .LBB1_1
|
||||
; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-SSE2-NEXT: leal -8(%ebp), %esp
|
||||
; X86-SSE2-NEXT: popl %esi
|
||||
; X86-SSE2-NEXT: popl %ebx
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE2-NEXT: movlps %xmm0, (%eax)
|
||||
; X86-SSE2-NEXT: movl %ebp, %esp
|
||||
; X86-SSE2-NEXT: popl %ebp
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
|
@ -176,27 +164,15 @@ define void @fadd_64r(double* %loc, double %val) nounwind {
|
|||
; X86-AVX: # %bb.0:
|
||||
; X86-AVX-NEXT: pushl %ebp
|
||||
; X86-AVX-NEXT: movl %esp, %ebp
|
||||
; X86-AVX-NEXT: pushl %ebx
|
||||
; X86-AVX-NEXT: pushl %esi
|
||||
; X86-AVX-NEXT: andl $-8, %esp
|
||||
; X86-AVX-NEXT: subl $8, %esp
|
||||
; X86-AVX-NEXT: movl 8(%ebp), %esi
|
||||
; X86-AVX-NEXT: movl 8(%ebp), %eax
|
||||
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
|
||||
; X86-AVX-NEXT: movl (%esp), %ebx
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-AVX-NEXT: movl (%esi), %eax
|
||||
; X86-AVX-NEXT: movl 4(%esi), %edx
|
||||
; X86-AVX-NEXT: .p2align 4, 0x90
|
||||
; X86-AVX-NEXT: .LBB1_1: # %atomicrmw.start
|
||||
; X86-AVX-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-AVX-NEXT: lock cmpxchg8b (%esi)
|
||||
; X86-AVX-NEXT: jne .LBB1_1
|
||||
; X86-AVX-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-AVX-NEXT: leal -8(%ebp), %esp
|
||||
; X86-AVX-NEXT: popl %esi
|
||||
; X86-AVX-NEXT: popl %ebx
|
||||
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-AVX-NEXT: vmovlps %xmm0, (%eax)
|
||||
; X86-AVX-NEXT: movl %ebp, %esp
|
||||
; X86-AVX-NEXT: popl %ebp
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
|
@ -353,24 +329,14 @@ define void @fadd_64g() nounwind {
|
|||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: pushl %ebp
|
||||
; X86-SSE2-NEXT: movl %esp, %ebp
|
||||
; X86-SSE2-NEXT: pushl %ebx
|
||||
; X86-SSE2-NEXT: andl $-8, %esp
|
||||
; X86-SSE2-NEXT: subl $16, %esp
|
||||
; X86-SSE2-NEXT: subl $8, %esp
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE2-NEXT: addsd {{\.LCPI.*}}, %xmm0
|
||||
; X86-SSE2-NEXT: movsd %xmm0, (%esp)
|
||||
; X86-SSE2-NEXT: movl (%esp), %ebx
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE2-NEXT: movl glob64+4, %edx
|
||||
; X86-SSE2-NEXT: movl glob64, %eax
|
||||
; X86-SSE2-NEXT: .p2align 4, 0x90
|
||||
; X86-SSE2-NEXT: .LBB3_1: # %atomicrmw.start
|
||||
; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-SSE2-NEXT: lock cmpxchg8b glob64
|
||||
; X86-SSE2-NEXT: jne .LBB3_1
|
||||
; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-SSE2-NEXT: leal -4(%ebp), %esp
|
||||
; X86-SSE2-NEXT: popl %ebx
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE2-NEXT: movlps %xmm0, glob64
|
||||
; X86-SSE2-NEXT: movl %ebp, %esp
|
||||
; X86-SSE2-NEXT: popl %ebp
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
|
@ -378,24 +344,14 @@ define void @fadd_64g() nounwind {
|
|||
; X86-AVX: # %bb.0:
|
||||
; X86-AVX-NEXT: pushl %ebp
|
||||
; X86-AVX-NEXT: movl %esp, %ebp
|
||||
; X86-AVX-NEXT: pushl %ebx
|
||||
; X86-AVX-NEXT: andl $-8, %esp
|
||||
; X86-AVX-NEXT: subl $16, %esp
|
||||
; X86-AVX-NEXT: subl $8, %esp
|
||||
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-AVX-NEXT: vaddsd {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
|
||||
; X86-AVX-NEXT: movl (%esp), %ebx
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-AVX-NEXT: movl glob64+4, %edx
|
||||
; X86-AVX-NEXT: movl glob64, %eax
|
||||
; X86-AVX-NEXT: .p2align 4, 0x90
|
||||
; X86-AVX-NEXT: .LBB3_1: # %atomicrmw.start
|
||||
; X86-AVX-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-AVX-NEXT: lock cmpxchg8b glob64
|
||||
; X86-AVX-NEXT: jne .LBB3_1
|
||||
; X86-AVX-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-AVX-NEXT: leal -4(%ebp), %esp
|
||||
; X86-AVX-NEXT: popl %ebx
|
||||
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-AVX-NEXT: vmovlps %xmm0, glob64
|
||||
; X86-AVX-NEXT: movl %ebp, %esp
|
||||
; X86-AVX-NEXT: popl %ebp
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
|
@ -552,24 +508,14 @@ define void @fadd_64imm() nounwind {
|
|||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: pushl %ebp
|
||||
; X86-SSE2-NEXT: movl %esp, %ebp
|
||||
; X86-SSE2-NEXT: pushl %ebx
|
||||
; X86-SSE2-NEXT: andl $-8, %esp
|
||||
; X86-SSE2-NEXT: subl $16, %esp
|
||||
; X86-SSE2-NEXT: subl $8, %esp
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE2-NEXT: addsd {{\.LCPI.*}}, %xmm0
|
||||
; X86-SSE2-NEXT: movsd %xmm0, (%esp)
|
||||
; X86-SSE2-NEXT: movl (%esp), %ebx
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE2-NEXT: movl -559038737, %eax
|
||||
; X86-SSE2-NEXT: movl -559038733, %edx
|
||||
; X86-SSE2-NEXT: .p2align 4, 0x90
|
||||
; X86-SSE2-NEXT: .LBB5_1: # %atomicrmw.start
|
||||
; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-SSE2-NEXT: lock cmpxchg8b -559038737
|
||||
; X86-SSE2-NEXT: jne .LBB5_1
|
||||
; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-SSE2-NEXT: leal -4(%ebp), %esp
|
||||
; X86-SSE2-NEXT: popl %ebx
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE2-NEXT: movlps %xmm0, -559038737
|
||||
; X86-SSE2-NEXT: movl %ebp, %esp
|
||||
; X86-SSE2-NEXT: popl %ebp
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
|
@ -577,24 +523,14 @@ define void @fadd_64imm() nounwind {
|
|||
; X86-AVX: # %bb.0:
|
||||
; X86-AVX-NEXT: pushl %ebp
|
||||
; X86-AVX-NEXT: movl %esp, %ebp
|
||||
; X86-AVX-NEXT: pushl %ebx
|
||||
; X86-AVX-NEXT: andl $-8, %esp
|
||||
; X86-AVX-NEXT: subl $16, %esp
|
||||
; X86-AVX-NEXT: subl $8, %esp
|
||||
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-AVX-NEXT: vaddsd {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
|
||||
; X86-AVX-NEXT: movl (%esp), %ebx
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-AVX-NEXT: movl -559038737, %eax
|
||||
; X86-AVX-NEXT: movl -559038733, %edx
|
||||
; X86-AVX-NEXT: .p2align 4, 0x90
|
||||
; X86-AVX-NEXT: .LBB5_1: # %atomicrmw.start
|
||||
; X86-AVX-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-AVX-NEXT: lock cmpxchg8b -559038737
|
||||
; X86-AVX-NEXT: jne .LBB5_1
|
||||
; X86-AVX-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-AVX-NEXT: leal -4(%ebp), %esp
|
||||
; X86-AVX-NEXT: popl %ebx
|
||||
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-AVX-NEXT: vmovlps %xmm0, -559038737
|
||||
; X86-AVX-NEXT: movl %ebp, %esp
|
||||
; X86-AVX-NEXT: popl %ebp
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
|
@ -757,24 +693,14 @@ define void @fadd_64stack() nounwind {
|
|||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: pushl %ebp
|
||||
; X86-SSE2-NEXT: movl %esp, %ebp
|
||||
; X86-SSE2-NEXT: pushl %ebx
|
||||
; X86-SSE2-NEXT: andl $-8, %esp
|
||||
; X86-SSE2-NEXT: subl $24, %esp
|
||||
; X86-SSE2-NEXT: subl $16, %esp
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE2-NEXT: addsd {{\.LCPI.*}}, %xmm0
|
||||
; X86-SSE2-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE2-NEXT: movl (%esp), %eax
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-SSE2-NEXT: .p2align 4, 0x90
|
||||
; X86-SSE2-NEXT: .LBB7_1: # %atomicrmw.start
|
||||
; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-SSE2-NEXT: lock cmpxchg8b (%esp)
|
||||
; X86-SSE2-NEXT: jne .LBB7_1
|
||||
; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-SSE2-NEXT: leal -4(%ebp), %esp
|
||||
; X86-SSE2-NEXT: popl %ebx
|
||||
; X86-SSE2-NEXT: movsd %xmm0, (%esp)
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE2-NEXT: movlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-SSE2-NEXT: movl %ebp, %esp
|
||||
; X86-SSE2-NEXT: popl %ebp
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
|
@ -782,24 +708,14 @@ define void @fadd_64stack() nounwind {
|
|||
; X86-AVX: # %bb.0:
|
||||
; X86-AVX-NEXT: pushl %ebp
|
||||
; X86-AVX-NEXT: movl %esp, %ebp
|
||||
; X86-AVX-NEXT: pushl %ebx
|
||||
; X86-AVX-NEXT: andl $-8, %esp
|
||||
; X86-AVX-NEXT: subl $24, %esp
|
||||
; X86-AVX-NEXT: subl $16, %esp
|
||||
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-AVX-NEXT: vaddsd {{\.LCPI.*}}, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-AVX-NEXT: movl (%esp), %eax
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X86-AVX-NEXT: .p2align 4, 0x90
|
||||
; X86-AVX-NEXT: .LBB7_1: # %atomicrmw.start
|
||||
; X86-AVX-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-AVX-NEXT: lock cmpxchg8b (%esp)
|
||||
; X86-AVX-NEXT: jne .LBB7_1
|
||||
; X86-AVX-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-AVX-NEXT: leal -4(%ebp), %esp
|
||||
; X86-AVX-NEXT: popl %ebx
|
||||
; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
|
||||
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-AVX-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-AVX-NEXT: movl %ebp, %esp
|
||||
; X86-AVX-NEXT: popl %ebp
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
|
@ -905,30 +821,16 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind {
|
|||
; X86-SSE2: # %bb.0: # %bb
|
||||
; X86-SSE2-NEXT: pushl %ebp
|
||||
; X86-SSE2-NEXT: movl %esp, %ebp
|
||||
; X86-SSE2-NEXT: pushl %ebx
|
||||
; X86-SSE2-NEXT: pushl %edi
|
||||
; X86-SSE2-NEXT: pushl %esi
|
||||
; X86-SSE2-NEXT: andl $-8, %esp
|
||||
; X86-SSE2-NEXT: subl $16, %esp
|
||||
; X86-SSE2-NEXT: movl 20(%ebp), %esi
|
||||
; X86-SSE2-NEXT: movl 8(%ebp), %edi
|
||||
; X86-SSE2-NEXT: subl $8, %esp
|
||||
; X86-SSE2-NEXT: movl 20(%ebp), %eax
|
||||
; X86-SSE2-NEXT: movl 8(%ebp), %ecx
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE2-NEXT: addsd 12(%ebp), %xmm0
|
||||
; X86-SSE2-NEXT: movsd %xmm0, (%esp)
|
||||
; X86-SSE2-NEXT: movl (%esp), %ebx
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE2-NEXT: movl (%edi,%esi,8), %eax
|
||||
; X86-SSE2-NEXT: movl 4(%edi,%esi,8), %edx
|
||||
; X86-SSE2-NEXT: .p2align 4, 0x90
|
||||
; X86-SSE2-NEXT: .LBB8_1: # %atomicrmw.start
|
||||
; X86-SSE2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-SSE2-NEXT: lock cmpxchg8b (%edi,%esi,8)
|
||||
; X86-SSE2-NEXT: jne .LBB8_1
|
||||
; X86-SSE2-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-SSE2-NEXT: leal -12(%ebp), %esp
|
||||
; X86-SSE2-NEXT: popl %esi
|
||||
; X86-SSE2-NEXT: popl %edi
|
||||
; X86-SSE2-NEXT: popl %ebx
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE2-NEXT: movlps %xmm0, (%ecx,%eax,8)
|
||||
; X86-SSE2-NEXT: movl %ebp, %esp
|
||||
; X86-SSE2-NEXT: popl %ebp
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
|
@ -936,30 +838,16 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind {
|
|||
; X86-AVX: # %bb.0: # %bb
|
||||
; X86-AVX-NEXT: pushl %ebp
|
||||
; X86-AVX-NEXT: movl %esp, %ebp
|
||||
; X86-AVX-NEXT: pushl %ebx
|
||||
; X86-AVX-NEXT: pushl %edi
|
||||
; X86-AVX-NEXT: pushl %esi
|
||||
; X86-AVX-NEXT: andl $-8, %esp
|
||||
; X86-AVX-NEXT: subl $16, %esp
|
||||
; X86-AVX-NEXT: movl 20(%ebp), %esi
|
||||
; X86-AVX-NEXT: movl 8(%ebp), %edi
|
||||
; X86-AVX-NEXT: subl $8, %esp
|
||||
; X86-AVX-NEXT: movl 20(%ebp), %eax
|
||||
; X86-AVX-NEXT: movl 8(%ebp), %ecx
|
||||
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-AVX-NEXT: vaddsd 12(%ebp), %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vmovsd %xmm0, (%esp)
|
||||
; X86-AVX-NEXT: movl (%esp), %ebx
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-AVX-NEXT: movl (%edi,%esi,8), %eax
|
||||
; X86-AVX-NEXT: movl 4(%edi,%esi,8), %edx
|
||||
; X86-AVX-NEXT: .p2align 4, 0x90
|
||||
; X86-AVX-NEXT: .LBB8_1: # %atomicrmw.start
|
||||
; X86-AVX-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-AVX-NEXT: lock cmpxchg8b (%edi,%esi,8)
|
||||
; X86-AVX-NEXT: jne .LBB8_1
|
||||
; X86-AVX-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-AVX-NEXT: leal -12(%ebp), %esp
|
||||
; X86-AVX-NEXT: popl %esi
|
||||
; X86-AVX-NEXT: popl %edi
|
||||
; X86-AVX-NEXT: popl %ebx
|
||||
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-AVX-NEXT: vmovlps %xmm0, (%ecx,%eax,8)
|
||||
; X86-AVX-NEXT: movl %ebp, %esp
|
||||
; X86-AVX-NEXT: popl %ebp
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
|
|
|
@ -6,30 +6,38 @@
|
|||
; FIXME: The generated code can be substantially improved.
|
||||
|
||||
define void @test1(i64* %ptr, i64 %val1) {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pushl %ebx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 12
|
||||
; CHECK-NEXT: .cfi_offset %esi, -12
|
||||
; CHECK-NEXT: .cfi_offset %ebx, -8
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; CHECK-NEXT: movl (%esi), %eax
|
||||
; CHECK-NEXT: movl 4(%esi), %edx
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: lock cmpxchg8b (%esi)
|
||||
; CHECK-NEXT: jne .LBB0_1
|
||||
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: popl %ebx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 4
|
||||
; CHECK-NEXT: retl
|
||||
; SSE42-LABEL: test1:
|
||||
; SSE42: # %bb.0:
|
||||
; SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SSE42-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE42-NEXT: movlps %xmm0, (%eax)
|
||||
; SSE42-NEXT: mfence
|
||||
; SSE42-NEXT: retl
|
||||
;
|
||||
; NOSSE-LABEL: test1:
|
||||
; NOSSE: # %bb.0:
|
||||
; NOSSE-NEXT: pushl %ebx
|
||||
; NOSSE-NEXT: .cfi_def_cfa_offset 8
|
||||
; NOSSE-NEXT: pushl %esi
|
||||
; NOSSE-NEXT: .cfi_def_cfa_offset 12
|
||||
; NOSSE-NEXT: .cfi_offset %esi, -12
|
||||
; NOSSE-NEXT: .cfi_offset %ebx, -8
|
||||
; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; NOSSE-NEXT: movl (%esi), %eax
|
||||
; NOSSE-NEXT: movl 4(%esi), %edx
|
||||
; NOSSE-NEXT: .p2align 4, 0x90
|
||||
; NOSSE-NEXT: .LBB0_1: # %atomicrmw.start
|
||||
; NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; NOSSE-NEXT: lock cmpxchg8b (%esi)
|
||||
; NOSSE-NEXT: jne .LBB0_1
|
||||
; NOSSE-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; NOSSE-NEXT: popl %esi
|
||||
; NOSSE-NEXT: .cfi_def_cfa_offset 8
|
||||
; NOSSE-NEXT: popl %ebx
|
||||
; NOSSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; NOSSE-NEXT: retl
|
||||
store atomic i64 %val1, i64* %ptr seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -135,30 +135,69 @@ define void @store_float(float* %fptr, float %v) {
|
|||
}
|
||||
|
||||
define void @store_double(double* %fptr, double %v) {
|
||||
; X86-LABEL: store_double:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NEXT: .cfi_offset %esi, -12
|
||||
; X86-NEXT: .cfi_offset %ebx, -8
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl (%esi), %eax
|
||||
; X86-NEXT: movl 4(%esi), %edx
|
||||
; X86-NEXT: .p2align 4, 0x90
|
||||
; X86-NEXT: .LBB2_1: # %atomicrmw.start
|
||||
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-NEXT: lock cmpxchg8b (%esi)
|
||||
; X86-NEXT: jne .LBB2_1
|
||||
; X86-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
; X86-SSE1-LABEL: store_double:
|
||||
; X86-SSE1: # %bb.0:
|
||||
; X86-SSE1-NEXT: pushl %ebx
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE1-NEXT: pushl %esi
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-SSE1-NEXT: .cfi_offset %esi, -12
|
||||
; X86-SSE1-NEXT: .cfi_offset %ebx, -8
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE1-NEXT: movl (%esi), %eax
|
||||
; X86-SSE1-NEXT: movl 4(%esi), %edx
|
||||
; X86-SSE1-NEXT: .p2align 4, 0x90
|
||||
; X86-SSE1-NEXT: .LBB2_1: # %atomicrmw.start
|
||||
; X86-SSE1-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-SSE1-NEXT: lock cmpxchg8b (%esi)
|
||||
; X86-SSE1-NEXT: jne .LBB2_1
|
||||
; X86-SSE1-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-SSE1-NEXT: popl %esi
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE1-NEXT: popl %ebx
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE1-NEXT: retl
|
||||
;
|
||||
; X86-SSE2-LABEL: store_double:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE2-NEXT: movlps %xmm0, (%eax)
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X86-AVX-LABEL: store_double:
|
||||
; X86-AVX: # %bb.0:
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-AVX-NEXT: vmovlps %xmm0, (%eax)
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
; X86-NOSSE-LABEL: store_double:
|
||||
; X86-NOSSE: # %bb.0:
|
||||
; X86-NOSSE-NEXT: pushl %ebx
|
||||
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NOSSE-NEXT: pushl %esi
|
||||
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NOSSE-NEXT: .cfi_offset %esi, -12
|
||||
; X86-NOSSE-NEXT: .cfi_offset %ebx, -8
|
||||
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOSSE-NEXT: movl (%esi), %eax
|
||||
; X86-NOSSE-NEXT: movl 4(%esi), %edx
|
||||
; X86-NOSSE-NEXT: .p2align 4, 0x90
|
||||
; X86-NOSSE-NEXT: .LBB2_1: # %atomicrmw.start
|
||||
; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-NOSSE-NEXT: lock cmpxchg8b (%esi)
|
||||
; X86-NOSSE-NEXT: jne .LBB2_1
|
||||
; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-NOSSE-NEXT: popl %esi
|
||||
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NOSSE-NEXT: popl %ebx
|
||||
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NOSSE-NEXT: retl
|
||||
;
|
||||
; X64-SSE-LABEL: store_double:
|
||||
; X64-SSE: # %bb.0:
|
||||
|
@ -641,30 +680,71 @@ define void @store_float_seq_cst(float* %fptr, float %v) {
|
|||
}
|
||||
|
||||
define void @store_double_seq_cst(double* %fptr, double %v) {
|
||||
; X86-LABEL: store_double_seq_cst:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %ebx
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NEXT: .cfi_offset %esi, -12
|
||||
; X86-NEXT: .cfi_offset %ebx, -8
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: movl (%esi), %eax
|
||||
; X86-NEXT: movl 4(%esi), %edx
|
||||
; X86-NEXT: .p2align 4, 0x90
|
||||
; X86-NEXT: .LBB9_1: # %atomicrmw.start
|
||||
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-NEXT: lock cmpxchg8b (%esi)
|
||||
; X86-NEXT: jne .LBB9_1
|
||||
; X86-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NEXT: popl %ebx
|
||||
; X86-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NEXT: retl
|
||||
; X86-SSE1-LABEL: store_double_seq_cst:
|
||||
; X86-SSE1: # %bb.0:
|
||||
; X86-SSE1-NEXT: pushl %ebx
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE1-NEXT: pushl %esi
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-SSE1-NEXT: .cfi_offset %esi, -12
|
||||
; X86-SSE1-NEXT: .cfi_offset %ebx, -8
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-SSE1-NEXT: movl (%esi), %eax
|
||||
; X86-SSE1-NEXT: movl 4(%esi), %edx
|
||||
; X86-SSE1-NEXT: .p2align 4, 0x90
|
||||
; X86-SSE1-NEXT: .LBB9_1: # %atomicrmw.start
|
||||
; X86-SSE1-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-SSE1-NEXT: lock cmpxchg8b (%esi)
|
||||
; X86-SSE1-NEXT: jne .LBB9_1
|
||||
; X86-SSE1-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-SSE1-NEXT: popl %esi
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE1-NEXT: popl %ebx
|
||||
; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-SSE1-NEXT: retl
|
||||
;
|
||||
; X86-SSE2-LABEL: store_double_seq_cst:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-SSE2-NEXT: movlps %xmm0, (%eax)
|
||||
; X86-SSE2-NEXT: mfence
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X86-AVX-LABEL: store_double_seq_cst:
|
||||
; X86-AVX: # %bb.0:
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-AVX-NEXT: vmovlps %xmm0, (%eax)
|
||||
; X86-AVX-NEXT: mfence
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
; X86-NOSSE-LABEL: store_double_seq_cst:
|
||||
; X86-NOSSE: # %bb.0:
|
||||
; X86-NOSSE-NEXT: pushl %ebx
|
||||
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NOSSE-NEXT: pushl %esi
|
||||
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12
|
||||
; X86-NOSSE-NEXT: .cfi_offset %esi, -12
|
||||
; X86-NOSSE-NEXT: .cfi_offset %ebx, -8
|
||||
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NOSSE-NEXT: movl (%esi), %eax
|
||||
; X86-NOSSE-NEXT: movl 4(%esi), %edx
|
||||
; X86-NOSSE-NEXT: .p2align 4, 0x90
|
||||
; X86-NOSSE-NEXT: .LBB9_1: # %atomicrmw.start
|
||||
; X86-NOSSE-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X86-NOSSE-NEXT: lock cmpxchg8b (%esi)
|
||||
; X86-NOSSE-NEXT: jne .LBB9_1
|
||||
; X86-NOSSE-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; X86-NOSSE-NEXT: popl %esi
|
||||
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-NOSSE-NEXT: popl %ebx
|
||||
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
|
||||
; X86-NOSSE-NEXT: retl
|
||||
;
|
||||
; X64-SSE-LABEL: store_double_seq_cst:
|
||||
; X64-SSE: # %bb.0:
|
||||
|
|
|
@ -835,34 +835,11 @@ define void @atomic_fetch_cmpxchg64() nounwind {
|
|||
define void @atomic_fetch_store64(i64 %x) nounwind {
|
||||
; X32-LABEL: atomic_fetch_store64:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: pushl %ebx
|
||||
; X32-NEXT: subl $20, %esp
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; X32-NEXT: movl sc64+4, %eax
|
||||
; X32-NEXT: movl sc64, %edx
|
||||
; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X32-NEXT: jmp .LBB11_1
|
||||
; X32-NEXT: .LBB11_1: # %atomicrmw.start
|
||||
; X32-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
|
||||
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
|
||||
; X32-NEXT: movl %ecx, %eax
|
||||
; X32-NEXT: movl (%esp), %edx # 4-byte Reload
|
||||
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
|
||||
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
|
||||
; X32-NEXT: lock cmpxchg8b sc64
|
||||
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
|
||||
; X32-NEXT: jne .LBB11_1
|
||||
; X32-NEXT: jmp .LBB11_2
|
||||
; X32-NEXT: .LBB11_2: # %atomicrmw.end
|
||||
; X32-NEXT: addl $20, %esp
|
||||
; X32-NEXT: popl %ebx
|
||||
; X32-NEXT: movd %ecx, %xmm0
|
||||
; X32-NEXT: pinsrd $1, %eax, %xmm0
|
||||
; X32-NEXT: movq %xmm0, sc64
|
||||
; X32-NEXT: retl
|
||||
store atomic i64 %x, i64* @sc64 release, align 8
|
||||
ret void
|
||||
|
|
Loading…
Reference in New Issue