forked from OSchip/llvm-project
parent
bd13c9787f
commit
0cb08e448a
|
@ -8584,13 +8584,14 @@ operation. The operation must be one of the following keywords:
|
||||||
- umax
|
- umax
|
||||||
- umin
|
- umin
|
||||||
|
|
||||||
The type of '<value>' must be an integer type whose bit width is a power
|
For most of these operations, the type of '<value>' must be an integer
|
||||||
of two greater than or equal to eight and less than or equal to a
|
type whose bit width is a power of two greater than or equal to eight
|
||||||
target-specific size limit. The type of the '``<pointer>``' operand must
|
and less than or equal to a target-specific size limit. For xchg, this
|
||||||
be a pointer to that type. If the ``atomicrmw`` is marked as
|
may also be a floating point type with the same size constraints as
|
||||||
``volatile``, then the optimizer is not allowed to modify the number or
|
integers. The type of the '``<pointer>``' operand must be a pointer to
|
||||||
order of execution of this ``atomicrmw`` with other :ref:`volatile
|
that type. If the ``atomicrmw`` is marked as ``volatile``, then the
|
||||||
operations <volatile>`.
|
optimizer is not allowed to modify the number or order of execution of
|
||||||
|
this ``atomicrmw`` with other :ref:`volatile operations <volatile>`.
|
||||||
|
|
||||||
A ``atomicrmw`` instruction can also take an optional
|
A ``atomicrmw`` instruction can also take an optional
|
||||||
":ref:`syncscope <syncscope>`" argument.
|
":ref:`syncscope <syncscope>`" argument.
|
||||||
|
|
|
@ -6850,12 +6850,20 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
|
||||||
if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
|
if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
|
||||||
return Error(ValLoc, "atomicrmw value and pointer type do not match");
|
return Error(ValLoc, "atomicrmw value and pointer type do not match");
|
||||||
|
|
||||||
if (!Val->getType()->isIntegerTy()) {
|
if (Operation != AtomicRMWInst::Xchg && !Val->getType()->isIntegerTy()) {
|
||||||
return Error(ValLoc, "atomicrmw " +
|
return Error(ValLoc, "atomicrmw " +
|
||||||
AtomicRMWInst::getOperationName(Operation) +
|
AtomicRMWInst::getOperationName(Operation) +
|
||||||
" operand must be an integer");
|
" operand must be an integer");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Operation == AtomicRMWInst::Xchg &&
|
||||||
|
!Val->getType()->isIntegerTy() &&
|
||||||
|
!Val->getType()->isFloatingPointTy()) {
|
||||||
|
return Error(ValLoc, "atomicrmw " +
|
||||||
|
AtomicRMWInst::getOperationName(Operation) +
|
||||||
|
" operand must be an integer or floating point type");
|
||||||
|
}
|
||||||
|
|
||||||
unsigned Size = Val->getType()->getPrimitiveSizeInBits();
|
unsigned Size = Val->getType()->getPrimitiveSizeInBits();
|
||||||
if (Size < 8 || (Size & (Size - 1)))
|
if (Size < 8 || (Size & (Size - 1)))
|
||||||
return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
|
return Error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
|
||||||
|
|
|
@ -496,11 +496,26 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
|
||||||
Value *Loaded, Value *NewVal,
|
Value *Loaded, Value *NewVal,
|
||||||
AtomicOrdering MemOpOrder,
|
AtomicOrdering MemOpOrder,
|
||||||
Value *&Success, Value *&NewLoaded) {
|
Value *&Success, Value *&NewLoaded) {
|
||||||
|
Type *OrigTy = NewVal->getType();
|
||||||
|
|
||||||
|
// This code can go away when cmpxchg supports FP types.
|
||||||
|
bool NeedBitcast = OrigTy->isFloatingPointTy();
|
||||||
|
if (NeedBitcast) {
|
||||||
|
IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
|
||||||
|
unsigned AS = Addr->getType()->getPointerAddressSpace();
|
||||||
|
Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
|
||||||
|
NewVal = Builder.CreateBitCast(NewVal, IntTy);
|
||||||
|
Loaded = Builder.CreateBitCast(Loaded, IntTy);
|
||||||
|
}
|
||||||
|
|
||||||
Value* Pair = Builder.CreateAtomicCmpXchg(
|
Value* Pair = Builder.CreateAtomicCmpXchg(
|
||||||
Addr, Loaded, NewVal, MemOpOrder,
|
Addr, Loaded, NewVal, MemOpOrder,
|
||||||
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
|
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
|
||||||
Success = Builder.CreateExtractValue(Pair, 1, "success");
|
Success = Builder.CreateExtractValue(Pair, 1, "success");
|
||||||
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
|
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
|
||||||
|
|
||||||
|
if (NeedBitcast)
|
||||||
|
NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Emit IR to implement the given atomicrmw operation on values in registers,
|
/// Emit IR to implement the given atomicrmw operation on values in registers,
|
||||||
|
|
|
@ -4532,6 +4532,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
|
||||||
Results.push_back(CvtVec);
|
Results.push_back(CvtVec);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case ISD::ATOMIC_SWAP: {
|
||||||
|
AtomicSDNode *AM = cast<AtomicSDNode>(Node);
|
||||||
|
SDLoc SL(Node);
|
||||||
|
SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal());
|
||||||
|
assert(NVT.getSizeInBits() == OVT.getSizeInBits() &&
|
||||||
|
"unexpected promotion type");
|
||||||
|
assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() &&
|
||||||
|
"unexpected atomic_swap with illegal type");
|
||||||
|
|
||||||
|
SDValue NewAtomic
|
||||||
|
= DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT,
|
||||||
|
DAG.getVTList(NVT, MVT::Other),
|
||||||
|
{ AM->getChain(), AM->getBasePtr(), CastVal },
|
||||||
|
AM->getMemOperand());
|
||||||
|
Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic));
|
||||||
|
Results.push_back(NewAtomic.getValue(1));
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Replace the original node with the legalized result.
|
// Replace the original node with the legalized result.
|
||||||
|
|
|
@ -104,6 +104,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
|
||||||
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
|
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
|
||||||
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
|
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
|
||||||
case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;
|
case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;
|
||||||
|
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
|
||||||
case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;
|
case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;
|
||||||
case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
|
case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
|
||||||
case ISD::SINT_TO_FP:
|
case ISD::SINT_TO_FP:
|
||||||
|
@ -1932,7 +1933,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
|
||||||
case ISD::SINT_TO_FP:
|
case ISD::SINT_TO_FP:
|
||||||
case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
|
case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
|
||||||
case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break;
|
case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break;
|
||||||
|
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (R.getNode())
|
if (R.getNode())
|
||||||
|
@ -2166,3 +2167,29 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
|
||||||
N->getValueType(0)));
|
N->getValueType(0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
|
||||||
|
EVT VT = N->getValueType(0);
|
||||||
|
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
|
||||||
|
|
||||||
|
AtomicSDNode *AM = cast<AtomicSDNode>(N);
|
||||||
|
SDLoc SL(N);
|
||||||
|
|
||||||
|
SDValue CastVal = BitConvertToInteger(AM->getVal());
|
||||||
|
EVT CastVT = CastVal.getValueType();
|
||||||
|
|
||||||
|
SDValue NewAtomic
|
||||||
|
= DAG.getAtomic(ISD::ATOMIC_SWAP, SL, CastVT,
|
||||||
|
DAG.getVTList(CastVT, MVT::Other),
|
||||||
|
{ AM->getChain(), AM->getBasePtr(), CastVal },
|
||||||
|
AM->getMemOperand());
|
||||||
|
|
||||||
|
SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
|
||||||
|
NewAtomic);
|
||||||
|
// Legalize the chain result by replacing uses of the old value chain with the
|
||||||
|
// new one
|
||||||
|
ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1));
|
||||||
|
|
||||||
|
return ResultCast;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -640,6 +640,7 @@ private:
|
||||||
SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
|
SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
|
||||||
SDValue PromoteFloatRes_UnaryOp(SDNode *N);
|
SDValue PromoteFloatRes_UnaryOp(SDNode *N);
|
||||||
SDValue PromoteFloatRes_UNDEF(SDNode *N);
|
SDValue PromoteFloatRes_UNDEF(SDNode *N);
|
||||||
|
SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N);
|
||||||
SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
|
SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
|
||||||
|
|
||||||
bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
|
bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
|
||||||
|
|
|
@ -583,6 +583,14 @@ void TargetLoweringBase::initActions() {
|
||||||
std::fill(std::begin(TargetDAGCombineArray),
|
std::fill(std::begin(TargetDAGCombineArray),
|
||||||
std::end(TargetDAGCombineArray), 0);
|
std::end(TargetDAGCombineArray), 0);
|
||||||
|
|
||||||
|
for (MVT VT : MVT::fp_valuetypes()) {
|
||||||
|
MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
|
||||||
|
if (IntVT.isValid()) {
|
||||||
|
setOperationAction(ISD::ATOMIC_SWAP, VT, Promote);
|
||||||
|
AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Set default actions for various operations.
|
// Set default actions for various operations.
|
||||||
for (MVT VT : MVT::all_valuetypes()) {
|
for (MVT VT : MVT::all_valuetypes()) {
|
||||||
// Default all indexed load / store to expand.
|
// Default all indexed load / store to expand.
|
||||||
|
|
|
@ -3431,10 +3431,17 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
|
||||||
PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
|
PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
|
||||||
Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
|
Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
|
||||||
Type *ElTy = PTy->getElementType();
|
Type *ElTy = PTy->getElementType();
|
||||||
Assert(ElTy->isIntegerTy(), "atomicrmw " +
|
if (Op == AtomicRMWInst::Xchg) {
|
||||||
AtomicRMWInst::getOperationName(Op) +
|
Assert(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(), "atomicrmw " +
|
||||||
" operand must have integer type!",
|
AtomicRMWInst::getOperationName(Op) +
|
||||||
&RMWI, ElTy);
|
" operand must have integer or floating point type!",
|
||||||
|
&RMWI, ElTy);
|
||||||
|
} else {
|
||||||
|
Assert(ElTy->isIntegerTy(), "atomicrmw " +
|
||||||
|
AtomicRMWInst::getOperationName(Op) +
|
||||||
|
" operand must have integer type!",
|
||||||
|
&RMWI, ElTy);
|
||||||
|
}
|
||||||
checkAtomicMemAccessSize(ElTy, &RMWI);
|
checkAtomicMemAccessSize(ElTy, &RMWI);
|
||||||
Assert(ElTy == RMWI.getOperand(1)->getType(),
|
Assert(ElTy == RMWI.getOperand(1)->getType(),
|
||||||
"Argument value type does not match pointer operand type!", &RMWI,
|
"Argument value type does not match pointer operand type!", &RMWI,
|
||||||
|
|
|
@ -11655,9 +11655,13 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
|
||||||
IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
|
IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
|
||||||
Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
|
Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
|
||||||
|
|
||||||
return Builder.CreateTruncOrBitCast(
|
Type *EltTy = cast<PointerType>(Addr->getType())->getElementType();
|
||||||
Builder.CreateCall(Ldxr, Addr),
|
|
||||||
cast<PointerType>(Addr->getType())->getElementType());
|
const DataLayout &DL = M->getDataLayout();
|
||||||
|
IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(EltTy));
|
||||||
|
Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);
|
||||||
|
|
||||||
|
return Builder.CreateBitCast(Trunc, EltTy);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
|
void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
|
||||||
|
@ -11692,6 +11696,10 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
|
||||||
Type *Tys[] = { Addr->getType() };
|
Type *Tys[] = { Addr->getType() };
|
||||||
Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
|
Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
|
||||||
|
|
||||||
|
const DataLayout &DL = M->getDataLayout();
|
||||||
|
IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
|
||||||
|
Val = Builder.CreateBitCast(Val, IntValTy);
|
||||||
|
|
||||||
return Builder.CreateCall(Stxr,
|
return Builder.CreateCall(Stxr,
|
||||||
{Builder.CreateZExtOrBitCast(
|
{Builder.CreateZExtOrBitCast(
|
||||||
Val, Stxr->getFunctionType()->getParamType(0)),
|
Val, Stxr->getFunctionType()->getParamType(0)),
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s
|
||||||
|
|
||||||
|
; CHECK: error: atomicrmw xchg operand must be an integer or floating point type
|
||||||
|
define void @f(i32** %ptr) {
|
||||||
|
atomicrmw xchg i32** %ptr, i32* null seq_cst
|
||||||
|
ret void
|
||||||
|
}
|
|
@ -761,6 +761,12 @@ define void @atomics(i32* %word) {
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @fp_atomics(float* %word) {
|
||||||
|
; CHECK: %atomicrmw.xchg = atomicrmw xchg float* %word, float 1.000000e+00 monotonic
|
||||||
|
%atomicrmw.xchg = atomicrmw xchg float* %word, float 1.0 monotonic
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
;; Fast Math Flags
|
;; Fast Math Flags
|
||||||
define void @fastmathflags_unop(float %op1) {
|
define void @fastmathflags_unop(float %op1) {
|
||||||
%f.nnan = fneg nnan float %op1
|
%f.nnan = fneg nnan float %op1
|
||||||
|
|
|
@ -703,6 +703,16 @@ entry:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}atomic_xchg_f32_offset:
|
||||||
|
; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
|
||||||
|
; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}}
|
||||||
|
define amdgpu_kernel void @atomic_xchg_f32_offset(float* %out, float %in) {
|
||||||
|
entry:
|
||||||
|
%gep = getelementptr float, float* %out, i32 4
|
||||||
|
%val = atomicrmw volatile xchg float* %gep, float %in seq_cst
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
|
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
|
||||||
; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
|
||||||
; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}}
|
||||||
|
|
|
@ -650,6 +650,15 @@ entry:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}atomic_xchg_f64_offset:
|
||||||
|
; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
|
||||||
|
define amdgpu_kernel void @atomic_xchg_f64_offset(double* %out, double %in) {
|
||||||
|
entry:
|
||||||
|
%gep = getelementptr double, double* %out, i64 4
|
||||||
|
%tmp0 = atomicrmw volatile xchg double* %gep, double %in seq_cst
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
|
; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
|
||||||
; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
|
; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
|
||||||
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
|
||||||
|
|
|
@ -839,6 +839,17 @@ entry:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}atomic_xchg_f32_offset:
|
||||||
|
; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
|
||||||
|
|
||||||
|
; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off offset:16{{$}}
|
||||||
|
define amdgpu_kernel void @atomic_xchg_f32_offset(float addrspace(1)* %out, float %in) {
|
||||||
|
entry:
|
||||||
|
%gep = getelementptr float, float addrspace(1)* %out, i64 4
|
||||||
|
%val = atomicrmw volatile xchg float addrspace(1)* %gep, float %in seq_cst
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
|
; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
|
||||||
; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
|
; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
|
||||||
; SIVI: buffer_store_dword [[RET]]
|
; SIVI: buffer_store_dword [[RET]]
|
||||||
|
|
|
@ -783,6 +783,17 @@ entry:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}atomic_xchg_f64_offset:
|
||||||
|
; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
|
||||||
|
|
||||||
|
; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}, off offset:32{{$}}
|
||||||
|
define amdgpu_kernel void @atomic_xchg_f64_offset(double addrspace(1)* %out, double %in) {
|
||||||
|
entry:
|
||||||
|
%gep = getelementptr double, double addrspace(1)* %out, i64 4
|
||||||
|
%tmp0 = atomicrmw volatile xchg double addrspace(1)* %gep, double %in seq_cst
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
|
; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
|
||||||
; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
|
; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
|
||||||
; CIVI: buffer_store_dwordx2 [[RET]]
|
; CIVI: buffer_store_dwordx2 [[RET]]
|
||||||
|
|
|
@ -36,6 +36,20 @@ define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_f32_offset:
|
||||||
|
; SICIVI: s_mov_b32 m0
|
||||||
|
; GFX9-NOT: m0
|
||||||
|
|
||||||
|
; EG: LDS_WRXCHG_RET *
|
||||||
|
; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
|
||||||
|
; GCN: s_endpgm
|
||||||
|
define amdgpu_kernel void @lds_atomic_xchg_ret_f32_offset(float addrspace(1)* %out, float addrspace(3)* %ptr) nounwind {
|
||||||
|
%gep = getelementptr float, float addrspace(3)* %ptr, i32 4
|
||||||
|
%result = atomicrmw xchg float addrspace(3)* %gep, float 4.0 seq_cst
|
||||||
|
store float %result, float addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; XXX - Is it really necessary to load 4 into VGPR?
|
; XXX - Is it really necessary to load 4 into VGPR?
|
||||||
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
|
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
|
||||||
; EG: LDS_ADD_RET *
|
; EG: LDS_ADD_RET *
|
||||||
|
|
|
@ -27,6 +27,19 @@ define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}lds_atomic_xchg_ret_f64_offset:
|
||||||
|
; SICIVI: s_mov_b32 m0
|
||||||
|
; GFX9-NOT: m0
|
||||||
|
|
||||||
|
; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
|
||||||
|
; GCN: s_endpgm
|
||||||
|
define amdgpu_kernel void @lds_atomic_xchg_ret_f64_offset(double addrspace(1)* %out, double addrspace(3)* %ptr) nounwind {
|
||||||
|
%gep = getelementptr double, double addrspace(3)* %ptr, i32 4
|
||||||
|
%result = atomicrmw xchg double addrspace(3)* %gep, double 4.0 seq_cst
|
||||||
|
store double %result, double addrspace(1)* %out, align 8
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:
|
; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:
|
||||||
; SICIVI: s_mov_b32 m0
|
; SICIVI: s_mov_b32 m0
|
||||||
; GFX9-NOT: m0
|
; GFX9-NOT: m0
|
||||||
|
|
|
@ -360,3 +360,27 @@ define void @atomic_store_relaxed(i128* %p, i128 %in) {
|
||||||
store atomic i128 %in, i128* %p unordered, align 16
|
store atomic i128 %in, i128* %p unordered, align 16
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@fsc128 = external global fp128
|
||||||
|
|
||||||
|
define void @atomic_fetch_swapf128(fp128 %x) nounwind {
|
||||||
|
; CHECK-LABEL: atomic_fetch_swapf128:
|
||||||
|
; CHECK: ## %bb.0:
|
||||||
|
; CHECK-NEXT: pushq %rbx
|
||||||
|
; CHECK-NEXT: movq %rsi, %rcx
|
||||||
|
; CHECK-NEXT: movq %rdi, %rbx
|
||||||
|
; CHECK-NEXT: movq _fsc128@{{.*}}(%rip), %rsi
|
||||||
|
; CHECK-NEXT: movq (%rsi), %rax
|
||||||
|
; CHECK-NEXT: movq 8(%rsi), %rdx
|
||||||
|
; CHECK-NEXT: .p2align 4, 0x90
|
||||||
|
; CHECK-NEXT: LBB14_1: ## %atomicrmw.start
|
||||||
|
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||||
|
; CHECK-NEXT: lock cmpxchg16b (%rsi)
|
||||||
|
; CHECK-NEXT: jne LBB14_1
|
||||||
|
; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
|
||||||
|
; CHECK-NEXT: popq %rbx
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
%t1 = atomicrmw xchg fp128* @fsc128, fp128 %x acquire
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
; RUN: llc < %s -O0 -mtriple=i386-unknown-unknown -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
|
; RUN: llc < %s -O0 -mtriple=i386-unknown-unknown -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
|
||||||
|
|
||||||
@sc16 = external global i16
|
@sc16 = external global i16
|
||||||
|
@fsc16 = external global half
|
||||||
|
|
||||||
define void @atomic_fetch_add16() nounwind {
|
define void @atomic_fetch_add16() nounwind {
|
||||||
; X64-LABEL: atomic_fetch_add16
|
; X64-LABEL: atomic_fetch_add16
|
||||||
|
@ -273,3 +274,14 @@ define void @atomic_fetch_swap16(i16 %x) nounwind {
|
||||||
; X64: ret
|
; X64: ret
|
||||||
; X32: ret
|
; X32: ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @atomic_fetch_swapf16(half %x) nounwind {
|
||||||
|
%t1 = atomicrmw xchg half* @fsc16, half %x acquire
|
||||||
|
; X64-NOT: lock
|
||||||
|
; X64: xchgw
|
||||||
|
; X32-NOT: lock
|
||||||
|
; X32: xchgw
|
||||||
|
ret void
|
||||||
|
; X64: ret
|
||||||
|
; X32: ret
|
||||||
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -mattr=-cmov,-sse -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOCMOV
|
; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -mattr=-cmov,-sse -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOCMOV
|
||||||
|
|
||||||
@sc32 = external global i32
|
@sc32 = external global i32
|
||||||
|
@fsc32 = external global float
|
||||||
|
|
||||||
define void @atomic_fetch_add32() nounwind {
|
define void @atomic_fetch_add32() nounwind {
|
||||||
; X64-LABEL: atomic_fetch_add32:
|
; X64-LABEL: atomic_fetch_add32:
|
||||||
|
@ -708,3 +709,35 @@ define void @atomic_fetch_swap32(i32 %x) nounwind {
|
||||||
%t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
|
%t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @atomic_fetch_swapf32(float %x) nounwind {
|
||||||
|
; X64-LABEL: atomic_fetch_swapf32:
|
||||||
|
; X64: # %bb.0:
|
||||||
|
; X64-NEXT: movd %xmm0, %eax
|
||||||
|
; X64-NEXT: xchgl %eax, {{.*}}(%rip)
|
||||||
|
; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||||
|
; X64-NEXT: retq
|
||||||
|
;
|
||||||
|
; X86-CMOV-LABEL: atomic_fetch_swapf32:
|
||||||
|
; X86-CMOV: # %bb.0:
|
||||||
|
; X86-CMOV-NEXT: pushl %eax
|
||||||
|
; X86-CMOV-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
|
; X86-CMOV-NEXT: movd %xmm0, %eax
|
||||||
|
; X86-CMOV-NEXT: xchgl %eax, fsc32
|
||||||
|
; X86-CMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
|
||||||
|
; X86-CMOV-NEXT: popl %eax
|
||||||
|
; X86-CMOV-NEXT: retl
|
||||||
|
;
|
||||||
|
; X86-NOCMOV-LABEL: atomic_fetch_swapf32:
|
||||||
|
; X86-NOCMOV: # %bb.0:
|
||||||
|
; X86-NOCMOV-NEXT: subl $8, %esp
|
||||||
|
; X86-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
|
||||||
|
; X86-NOCMOV-NEXT: fstps {{[0-9]+}}(%esp)
|
||||||
|
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
; X86-NOCMOV-NEXT: xchgl %eax, fsc32
|
||||||
|
; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
|
||||||
|
; X86-NOCMOV-NEXT: addl $8, %esp
|
||||||
|
; X86-NOCMOV-NEXT: retl
|
||||||
|
%t1 = atomicrmw xchg float* @fsc32, float %x acquire
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
; RUN: llc < %s -O0 -mtriple=x86_64-- -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
|
; RUN: llc < %s -O0 -mtriple=x86_64-- -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
|
||||||
|
|
||||||
@sc64 = external global i64
|
@sc64 = external global i64
|
||||||
|
@fsc64 = external global double
|
||||||
|
|
||||||
define void @atomic_fetch_add64() nounwind {
|
define void @atomic_fetch_add64() nounwind {
|
||||||
; X64-LABEL: atomic_fetch_add64:
|
; X64-LABEL: atomic_fetch_add64:
|
||||||
|
@ -233,3 +234,16 @@ define void @atomic_fetch_swap64(i64 %x) nounwind {
|
||||||
; X64: ret
|
; X64: ret
|
||||||
; X32: ret
|
; X32: ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @atomic_fetch_swapf64(double %x) nounwind {
|
||||||
|
; X64-LABEL: atomic_fetch_swapf64:
|
||||||
|
; X32-LABEL: atomic_fetch_swapf64:
|
||||||
|
%t1 = atomicrmw xchg double* @fsc64, double %x acquire
|
||||||
|
; X64-NOT: lock
|
||||||
|
; X64: xchgq
|
||||||
|
; X32: lock
|
||||||
|
; X32: xchg8b
|
||||||
|
ret void
|
||||||
|
; X64: ret
|
||||||
|
; X32: ret
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||||
|
; RUN: opt -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s
|
||||||
|
|
||||||
|
define void @atomic_swap_f16(half* %ptr, half %val) nounwind {
|
||||||
|
; CHECK-LABEL: @atomic_swap_f16(
|
||||||
|
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||||
|
; CHECK: atomicrmw.start:
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f16(half* [[PTR:%.*]])
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i16
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[VAL:%.*]] to i16
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i64
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0f16(i64 [[TMP5]], half* [[PTR]])
|
||||||
|
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0
|
||||||
|
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
|
||||||
|
; CHECK: atomicrmw.end:
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
%t1 = atomicrmw xchg half* %ptr, half %val acquire
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @atomic_swap_f32(float* %ptr, float %val) nounwind {
|
||||||
|
; CHECK-LABEL: @atomic_swap_f32(
|
||||||
|
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||||
|
; CHECK: atomicrmw.start:
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f32(float* [[PTR:%.*]])
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[VAL:%.*]] to i32
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0f32(i64 [[TMP5]], float* [[PTR]])
|
||||||
|
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0
|
||||||
|
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
|
||||||
|
; CHECK: atomicrmw.end:
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
%t1 = atomicrmw xchg float* %ptr, float %val acquire
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @atomic_swap_f64(double* %ptr, double %val) nounwind {
|
||||||
|
; CHECK-LABEL: @atomic_swap_f64(
|
||||||
|
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||||
|
; CHECK: atomicrmw.start:
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f64(double* [[PTR:%.*]])
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[VAL:%.*]] to i64
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.aarch64.stxr.p0f64(i64 [[TMP3]], double* [[PTR]])
|
||||||
|
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP4]], 0
|
||||||
|
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
|
||||||
|
; CHECK: atomicrmw.end:
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
%t1 = atomicrmw xchg double* %ptr, double %val acquire
|
||||||
|
ret void
|
||||||
|
}
|
|
@ -0,0 +1,3 @@
|
||||||
|
if not 'AArch64' in config.root.targets:
|
||||||
|
config.unsupported = True
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||||
|
; RUN: opt -S -mtriple=i686-linux-gnu -atomic-expand %s | FileCheck %s
|
||||||
|
|
||||||
|
define double @atomic_xchg_f64(double* %ptr) nounwind {
|
||||||
|
; CHECK-LABEL: @atomic_xchg_f64(
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[PTR:%.*]], align 8
|
||||||
|
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||||
|
; CHECK: atomicrmw.start:
|
||||||
|
; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[PTR]] to i64*
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP3]], i64 4616189618054758400 seq_cst seq_cst
|
||||||
|
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
|
||||||
|
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
|
||||||
|
; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
|
||||||
|
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||||
|
; CHECK: atomicrmw.end:
|
||||||
|
; CHECK-NEXT: ret double [[TMP5]]
|
||||||
|
;
|
||||||
|
%result = atomicrmw xchg double* %ptr, double 4.0 seq_cst
|
||||||
|
ret double %result
|
||||||
|
}
|
||||||
|
|
||||||
|
define double @atomic_xchg_f64_as1(double addrspace(1)* %ptr) nounwind {
|
||||||
|
; CHECK-LABEL: @atomic_xchg_f64_as1(
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = load double, double addrspace(1)* [[PTR:%.*]], align 8
|
||||||
|
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
|
||||||
|
; CHECK: atomicrmw.start:
|
||||||
|
; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double addrspace(1)* [[PTR]] to i64 addrspace(1)*
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP3]], i64 4616189618054758400 seq_cst seq_cst
|
||||||
|
; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
|
||||||
|
; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
|
||||||
|
; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
|
||||||
|
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
|
||||||
|
; CHECK: atomicrmw.end:
|
||||||
|
; CHECK-NEXT: ret double [[TMP5]]
|
||||||
|
;
|
||||||
|
%result = atomicrmw xchg double addrspace(1)* %ptr, double 4.0 seq_cst
|
||||||
|
ret double %result
|
||||||
|
}
|
Loading…
Reference in New Issue