Allow FP types for atomicrmw xchg

llvm-svn: 351427
This commit is contained in:
Matt Arsenault 2019-01-17 10:49:01 +00:00
parent bd13c9787f
commit 0cb08e448a
24 changed files with 375 additions and 16 deletions

View File

@ -8584,13 +8584,14 @@ operation. The operation must be one of the following keywords:
- umax - umax
- umin - umin
The type of '<value>' must be an integer type whose bit width is a power For most of these operations, the type of '<value>' must be an integer
of two greater than or equal to eight and less than or equal to a type whose bit width is a power of two greater than or equal to eight
target-specific size limit. The type of the '``<pointer>``' operand must and less than or equal to a target-specific size limit. For xchg, this
be a pointer to that type. If the ``atomicrmw`` is marked as may also be a floating point type with the same size constraints as
``volatile``, then the optimizer is not allowed to modify the number or integers. The type of the '``<pointer>``' operand must be a pointer to
order of execution of this ``atomicrmw`` with other :ref:`volatile that type. If the ``atomicrmw`` is marked as ``volatile``, then the
operations <volatile>`. optimizer is not allowed to modify the number or order of execution of
this ``atomicrmw`` with other :ref:`volatile operations <volatile>`.
A ``atomicrmw`` instruction can also take an optional A ``atomicrmw`` instruction can also take an optional
":ref:`syncscope <syncscope>`" argument. ":ref:`syncscope <syncscope>`" argument.

View File

@ -6850,12 +6850,20 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType()) if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
return Error(ValLoc, "atomicrmw value and pointer type do not match"); return Error(ValLoc, "atomicrmw value and pointer type do not match");
if (!Val->getType()->isIntegerTy()) { if (Operation != AtomicRMWInst::Xchg && !Val->getType()->isIntegerTy()) {
return Error(ValLoc, "atomicrmw " + return Error(ValLoc, "atomicrmw " +
AtomicRMWInst::getOperationName(Operation) + AtomicRMWInst::getOperationName(Operation) +
" operand must be an integer"); " operand must be an integer");
} }
if (Operation == AtomicRMWInst::Xchg &&
!Val->getType()->isIntegerTy() &&
!Val->getType()->isFloatingPointTy()) {
return Error(ValLoc, "atomicrmw " +
AtomicRMWInst::getOperationName(Operation) +
" operand must be an integer or floating point type");
}
unsigned Size = Val->getType()->getPrimitiveSizeInBits(); unsigned Size = Val->getType()->getPrimitiveSizeInBits();
if (Size < 8 || (Size & (Size - 1))) if (Size < 8 || (Size & (Size - 1)))
return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized" return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"

View File

@ -496,11 +496,26 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
Value *Loaded, Value *NewVal, Value *Loaded, Value *NewVal,
AtomicOrdering MemOpOrder, AtomicOrdering MemOpOrder,
Value *&Success, Value *&NewLoaded) { Value *&Success, Value *&NewLoaded) {
Type *OrigTy = NewVal->getType();
// This code can go away when cmpxchg supports FP types.
bool NeedBitcast = OrigTy->isFloatingPointTy();
if (NeedBitcast) {
IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
unsigned AS = Addr->getType()->getPointerAddressSpace();
Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
NewVal = Builder.CreateBitCast(NewVal, IntTy);
Loaded = Builder.CreateBitCast(Loaded, IntTy);
}
Value* Pair = Builder.CreateAtomicCmpXchg( Value* Pair = Builder.CreateAtomicCmpXchg(
Addr, Loaded, NewVal, MemOpOrder, Addr, Loaded, NewVal, MemOpOrder,
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
Success = Builder.CreateExtractValue(Pair, 1, "success"); Success = Builder.CreateExtractValue(Pair, 1, "success");
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
if (NeedBitcast)
NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
} }
/// Emit IR to implement the given atomicrmw operation on values in registers, /// Emit IR to implement the given atomicrmw operation on values in registers,

View File

@ -4532,6 +4532,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(CvtVec); Results.push_back(CvtVec);
break; break;
} }
case ISD::ATOMIC_SWAP: {
AtomicSDNode *AM = cast<AtomicSDNode>(Node);
SDLoc SL(Node);
SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal());
assert(NVT.getSizeInBits() == OVT.getSizeInBits() &&
"unexpected promotion type");
assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() &&
"unexpected atomic_swap with illegal type");
SDValue NewAtomic
= DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT,
DAG.getVTList(NVT, MVT::Other),
{ AM->getChain(), AM->getBasePtr(), CastVal },
AM->getMemOperand());
Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic));
Results.push_back(NewAtomic.getValue(1));
break;
}
} }
// Replace the original node with the legalized result. // Replace the original node with the legalized result.

View File

@ -104,6 +104,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;
case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
case ISD::SINT_TO_FP: case ISD::SINT_TO_FP:
@ -1932,7 +1933,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::SINT_TO_FP: case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break; case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break; case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
} }
if (R.getNode()) if (R.getNode())
@ -2166,3 +2167,29 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
N->getValueType(0))); N->getValueType(0)));
} }
SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
EVT VT = N->getValueType(0);
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
AtomicSDNode *AM = cast<AtomicSDNode>(N);
SDLoc SL(N);
SDValue CastVal = BitConvertToInteger(AM->getVal());
EVT CastVT = CastVal.getValueType();
SDValue NewAtomic
= DAG.getAtomic(ISD::ATOMIC_SWAP, SL, CastVT,
DAG.getVTList(CastVT, MVT::Other),
{ AM->getChain(), AM->getBasePtr(), CastVal },
AM->getMemOperand());
SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
NewAtomic);
// Legalize the chain result by replacing uses of the old value chain with the
// new one
ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1));
return ResultCast;
}

View File

@ -640,6 +640,7 @@ private:
SDValue PromoteFloatRes_SELECT_CC(SDNode *N); SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
SDValue PromoteFloatRes_UnaryOp(SDNode *N); SDValue PromoteFloatRes_UnaryOp(SDNode *N);
SDValue PromoteFloatRes_UNDEF(SDNode *N); SDValue PromoteFloatRes_UNDEF(SDNode *N);
SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N);
SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
bool PromoteFloatOperand(SDNode *N, unsigned OpNo); bool PromoteFloatOperand(SDNode *N, unsigned OpNo);

View File

@ -583,6 +583,14 @@ void TargetLoweringBase::initActions() {
std::fill(std::begin(TargetDAGCombineArray), std::fill(std::begin(TargetDAGCombineArray),
std::end(TargetDAGCombineArray), 0); std::end(TargetDAGCombineArray), 0);
for (MVT VT : MVT::fp_valuetypes()) {
MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
if (IntVT.isValid()) {
setOperationAction(ISD::ATOMIC_SWAP, VT, Promote);
AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT);
}
}
// Set default actions for various operations. // Set default actions for various operations.
for (MVT VT : MVT::all_valuetypes()) { for (MVT VT : MVT::all_valuetypes()) {
// Default all indexed load / store to expand. // Default all indexed load / store to expand.

View File

@ -3431,10 +3431,17 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType()); PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI); Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
Type *ElTy = PTy->getElementType(); Type *ElTy = PTy->getElementType();
Assert(ElTy->isIntegerTy(), "atomicrmw " + if (Op == AtomicRMWInst::Xchg) {
AtomicRMWInst::getOperationName(Op) + Assert(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(), "atomicrmw " +
" operand must have integer type!", AtomicRMWInst::getOperationName(Op) +
&RMWI, ElTy); " operand must have integer or floating point type!",
&RMWI, ElTy);
} else {
Assert(ElTy->isIntegerTy(), "atomicrmw " +
AtomicRMWInst::getOperationName(Op) +
" operand must have integer type!",
&RMWI, ElTy);
}
checkAtomicMemAccessSize(ElTy, &RMWI); checkAtomicMemAccessSize(ElTy, &RMWI);
Assert(ElTy == RMWI.getOperand(1)->getType(), Assert(ElTy == RMWI.getOperand(1)->getType(),
"Argument value type does not match pointer operand type!", &RMWI, "Argument value type does not match pointer operand type!", &RMWI,

View File

@ -11655,9 +11655,13 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr; IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys); Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
return Builder.CreateTruncOrBitCast( Type *EltTy = cast<PointerType>(Addr->getType())->getElementType();
Builder.CreateCall(Ldxr, Addr),
cast<PointerType>(Addr->getType())->getElementType()); const DataLayout &DL = M->getDataLayout();
IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(EltTy));
Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);
return Builder.CreateBitCast(Trunc, EltTy);
} }
void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance( void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
@ -11692,6 +11696,10 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Type *Tys[] = { Addr->getType() }; Type *Tys[] = { Addr->getType() };
Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys); Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
const DataLayout &DL = M->getDataLayout();
IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
Val = Builder.CreateBitCast(Val, IntValTy);
return Builder.CreateCall(Stxr, return Builder.CreateCall(Stxr,
{Builder.CreateZExtOrBitCast( {Builder.CreateZExtOrBitCast(
Val, Stxr->getFunctionType()->getParamType(0)), Val, Stxr->getFunctionType()->getParamType(0)),

View File

@ -0,0 +1,7 @@
; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s
; CHECK: error: atomicrmw xchg operand must be an integer or floating point type
define void @f(i32** %ptr) {
atomicrmw xchg i32** %ptr, i32* null seq_cst
ret void
}

View File

@ -761,6 +761,12 @@ define void @atomics(i32* %word) {
ret void ret void
} }
define void @fp_atomics(float* %word) {
; CHECK: %atomicrmw.xchg = atomicrmw xchg float* %word, float 1.000000e+00 monotonic
%atomicrmw.xchg = atomicrmw xchg float* %word, float 1.0 monotonic
ret void
}
;; Fast Math Flags ;; Fast Math Flags
define void @fastmathflags_unop(float %op1) { define void @fastmathflags_unop(float %op1) {
%f.nnan = fneg nnan float %op1 %f.nnan = fneg nnan float %op1

View File

@ -703,6 +703,16 @@ entry:
ret void ret void
} }
; GCN-LABEL: {{^}}atomic_xchg_f32_offset:
; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_xchg_f32_offset(float* %out, float %in) {
entry:
%gep = getelementptr float, float* %out, i32 4
%val = atomicrmw volatile xchg float* %gep, float %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset: ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}

View File

@ -650,6 +650,15 @@ entry:
ret void ret void
} }
; GCN-LABEL: {{^}}atomic_xchg_f64_offset:
; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
define amdgpu_kernel void @atomic_xchg_f64_offset(double* %out, double %in) {
entry:
%gep = getelementptr double, double* %out, i64 4
%tmp0 = atomicrmw volatile xchg double* %gep, double %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} ; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]

View File

@ -839,6 +839,17 @@ entry:
ret void ret void
} }
; GCN-LABEL: {{^}}atomic_xchg_f32_offset:
; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
define amdgpu_kernel void @atomic_xchg_f32_offset(float addrspace(1)* %out, float %in) {
entry:
%gep = getelementptr float, float addrspace(1)* %out, i64 4
%val = atomicrmw volatile xchg float addrspace(1)* %gep, float %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset: ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; SIVI: buffer_store_dword [[RET]] ; SIVI: buffer_store_dword [[RET]]

View File

@ -783,6 +783,17 @@ entry:
ret void ret void
} }
; GCN-LABEL: {{^}}atomic_xchg_f64_offset:
; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
define amdgpu_kernel void @atomic_xchg_f64_offset(double addrspace(1)* %out, double %in) {
entry:
%gep = getelementptr double, double addrspace(1)* %out, i64 4
%tmp0 = atomicrmw volatile xchg double addrspace(1)* %gep, double %in seq_cst
ret void
}
; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]] ; CIVI: buffer_store_dwordx2 [[RET]]

View File

@ -36,6 +36,20 @@ define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out
ret void ret void
} }
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_f32_offset:
; SICIVI: s_mov_b32 m0
; GFX9-NOT: m0
; EG: LDS_WRXCHG_RET *
; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
; GCN: s_endpgm
define amdgpu_kernel void @lds_atomic_xchg_ret_f32_offset(float addrspace(1)* %out, float addrspace(3)* %ptr) nounwind {
%gep = getelementptr float, float addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg float addrspace(3)* %gep, float 4.0 seq_cst
store float %result, float addrspace(1)* %out, align 4
ret void
}
; XXX - Is it really necessary to load 4 into VGPR? ; XXX - Is it really necessary to load 4 into VGPR?
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32: ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
; EG: LDS_ADD_RET * ; EG: LDS_ADD_RET *

View File

@ -27,6 +27,19 @@ define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out
ret void ret void
} }
; GCN-LABEL: {{^}}lds_atomic_xchg_ret_f64_offset:
; SICIVI: s_mov_b32 m0
; GFX9-NOT: m0
; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
; GCN: s_endpgm
define amdgpu_kernel void @lds_atomic_xchg_ret_f64_offset(double addrspace(1)* %out, double addrspace(3)* %ptr) nounwind {
%gep = getelementptr double, double addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg double addrspace(3)* %gep, double 4.0 seq_cst
store double %result, double addrspace(1)* %out, align 8
ret void
}
; GCN-LABEL: {{^}}lds_atomic_add_ret_i64: ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:
; SICIVI: s_mov_b32 m0 ; SICIVI: s_mov_b32 m0
; GFX9-NOT: m0 ; GFX9-NOT: m0

View File

@ -360,3 +360,27 @@ define void @atomic_store_relaxed(i128* %p, i128 %in) {
store atomic i128 %in, i128* %p unordered, align 16 store atomic i128 %in, i128* %p unordered, align 16
ret void ret void
} }
@fsc128 = external global fp128
define void @atomic_fetch_swapf128(fp128 %x) nounwind {
; CHECK-LABEL: atomic_fetch_swapf128:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movq %rsi, %rcx
; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: movq _fsc128@{{.*}}(%rip), %rsi
; CHECK-NEXT: movq (%rsi), %rax
; CHECK-NEXT: movq 8(%rsi), %rdx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB14_1: ## %atomicrmw.start
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lock cmpxchg16b (%rsi)
; CHECK-NEXT: jne LBB14_1
; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
%t1 = atomicrmw xchg fp128* @fsc128, fp128 %x acquire
ret void
}

View File

@ -2,6 +2,7 @@
; RUN: llc < %s -O0 -mtriple=i386-unknown-unknown -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32 ; RUN: llc < %s -O0 -mtriple=i386-unknown-unknown -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
@sc16 = external global i16 @sc16 = external global i16
@fsc16 = external global half
define void @atomic_fetch_add16() nounwind { define void @atomic_fetch_add16() nounwind {
; X64-LABEL: atomic_fetch_add16 ; X64-LABEL: atomic_fetch_add16
@ -273,3 +274,14 @@ define void @atomic_fetch_swap16(i16 %x) nounwind {
; X64: ret ; X64: ret
; X32: ret ; X32: ret
} }
define void @atomic_fetch_swapf16(half %x) nounwind {
%t1 = atomicrmw xchg half* @fsc16, half %x acquire
; X64-NOT: lock
; X64: xchgw
; X32-NOT: lock
; X32: xchgw
ret void
; X64: ret
; X32: ret
}

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -mattr=-cmov,-sse -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOCMOV ; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -mattr=-cmov,-sse -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOCMOV
@sc32 = external global i32 @sc32 = external global i32
@fsc32 = external global float
define void @atomic_fetch_add32() nounwind { define void @atomic_fetch_add32() nounwind {
; X64-LABEL: atomic_fetch_add32: ; X64-LABEL: atomic_fetch_add32:
@ -708,3 +709,35 @@ define void @atomic_fetch_swap32(i32 %x) nounwind {
%t1 = atomicrmw xchg i32* @sc32, i32 %x acquire %t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
ret void ret void
} }
define void @atomic_fetch_swapf32(float %x) nounwind {
; X64-LABEL: atomic_fetch_swapf32:
; X64: # %bb.0:
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: xchgl %eax, {{.*}}(%rip)
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: retq
;
; X86-CMOV-LABEL: atomic_fetch_swapf32:
; X86-CMOV: # %bb.0:
; X86-CMOV-NEXT: pushl %eax
; X86-CMOV-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-CMOV-NEXT: movd %xmm0, %eax
; X86-CMOV-NEXT: xchgl %eax, fsc32
; X86-CMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-CMOV-NEXT: popl %eax
; X86-CMOV-NEXT: retl
;
; X86-NOCMOV-LABEL: atomic_fetch_swapf32:
; X86-NOCMOV: # %bb.0:
; X86-NOCMOV-NEXT: subl $8, %esp
; X86-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; X86-NOCMOV-NEXT: fstps {{[0-9]+}}(%esp)
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: xchgl %eax, fsc32
; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NOCMOV-NEXT: addl $8, %esp
; X86-NOCMOV-NEXT: retl
%t1 = atomicrmw xchg float* @fsc32, float %x acquire
ret void
}

View File

@ -1,6 +1,7 @@
; RUN: llc < %s -O0 -mtriple=x86_64-- -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64 ; RUN: llc < %s -O0 -mtriple=x86_64-- -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
@sc64 = external global i64 @sc64 = external global i64
@fsc64 = external global double
define void @atomic_fetch_add64() nounwind { define void @atomic_fetch_add64() nounwind {
; X64-LABEL: atomic_fetch_add64: ; X64-LABEL: atomic_fetch_add64:
@ -233,3 +234,16 @@ define void @atomic_fetch_swap64(i64 %x) nounwind {
; X64: ret ; X64: ret
; X32: ret ; X32: ret
} }
define void @atomic_fetch_swapf64(double %x) nounwind {
; X64-LABEL: atomic_fetch_swapf64:
; X32-LABEL: atomic_fetch_swapf64:
%t1 = atomicrmw xchg double* @fsc64, double %x acquire
; X64-NOT: lock
; X64: xchgq
; X32: lock
; X32: xchg8b
ret void
; X64: ret
; X32: ret
}

View File

@ -0,0 +1,57 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s
define void @atomic_swap_f16(half* %ptr, half %val) nounwind {
; CHECK-LABEL: @atomic_swap_f16(
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f16(half* [[PTR:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i16
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
; CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[VAL:%.*]] to i16
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0f16(i64 [[TMP5]], half* [[PTR]])
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret void
;
%t1 = atomicrmw xchg half* %ptr, half %val acquire
ret void
}
define void @atomic_swap_f32(float* %ptr, float %val) nounwind {
; CHECK-LABEL: @atomic_swap_f32(
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f32(float* [[PTR:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[VAL:%.*]] to i32
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0f32(i64 [[TMP5]], float* [[PTR]])
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret void
;
%t1 = atomicrmw xchg float* %ptr, float %val acquire
ret void
}
define void @atomic_swap_f64(double* %ptr, double %val) nounwind {
; CHECK-LABEL: @atomic_swap_f64(
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f64(double* [[PTR:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[VAL:%.*]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.aarch64.stxr.p0f64(i64 [[TMP3]], double* [[PTR]])
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret void
;
%t1 = atomicrmw xchg double* %ptr, double %val acquire
ret void
}

View File

@ -0,0 +1,3 @@
if not 'AArch64' in config.root.targets:
config.unsupported = True

View File

@ -0,0 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=i686-linux-gnu -atomic-expand %s | FileCheck %s
define double @atomic_xchg_f64(double* %ptr) nounwind {
; CHECK-LABEL: @atomic_xchg_f64(
; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[PTR:%.*]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[PTR]] to i64*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP3]], i64 4616189618054758400 seq_cst seq_cst
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret double [[TMP5]]
;
%result = atomicrmw xchg double* %ptr, double 4.0 seq_cst
ret double %result
}
define double @atomic_xchg_f64_as1(double addrspace(1)* %ptr) nounwind {
; CHECK-LABEL: @atomic_xchg_f64_as1(
; CHECK-NEXT: [[TMP1:%.*]] = load double, double addrspace(1)* [[PTR:%.*]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double addrspace(1)* [[PTR]] to i64 addrspace(1)*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP3]], i64 4616189618054758400 seq_cst seq_cst
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
; CHECK-NEXT: ret double [[TMP5]]
;
%result = atomicrmw xchg double addrspace(1)* %ptr, double 4.0 seq_cst
ret double %result
}