forked from OSchip/llvm-project
AMDGPU: Add atomic_inc + atomic_dec intrinsics
These are different than atomicrmw add 1 because they have an additional input value to clamp the result. llvm-svn: 266074
This commit is contained in:
parent
21ecfe43ba
commit
a9dbdcae04
|
@ -151,6 +151,17 @@ def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
|
|||
[llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
|
||||
>;
|
||||
|
||||
// TODO: Do we want an ordering for these?
|
||||
def int_amdgcn_atomic_inc : Intrinsic<[llvm_anyint_ty],
|
||||
[llvm_anyptr_ty, LLVMMatchType<0>],
|
||||
[IntrReadWriteArgMem, NoCapture<0>]
|
||||
>;
|
||||
|
||||
def int_amdgcn_atomic_dec : Intrinsic<[llvm_anyint_ty],
|
||||
[llvm_anyptr_ty, LLVMMatchType<0>],
|
||||
[IntrReadWriteArgMem, NoCapture<0>]
|
||||
>;
|
||||
|
||||
class AMDGPUImageLoad : Intrinsic <
|
||||
[llvm_v4f32_ty], // vdata(VGPR)
|
||||
[llvm_anyint_ty, // vaddr(VGPR)
|
||||
|
|
|
@ -337,7 +337,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|||
return nullptr; // Already selected.
|
||||
}
|
||||
|
||||
if (isa<AtomicSDNode>(N))
|
||||
if (isa<AtomicSDNode>(N) ||
|
||||
(Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
|
||||
N = glueCopyToM0(N);
|
||||
|
||||
switch (Opc) {
|
||||
|
|
|
@ -2813,6 +2813,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
NODE_NAME_CASE(STORE_MSKOR)
|
||||
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
|
||||
NODE_NAME_CASE(ATOMIC_CMP_SWAP)
|
||||
NODE_NAME_CASE(ATOMIC_INC)
|
||||
NODE_NAME_CASE(ATOMIC_DEC)
|
||||
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
|
||||
}
|
||||
return nullptr;
|
||||
|
|
|
@ -314,6 +314,8 @@ enum NodeType : unsigned {
|
|||
LOAD_CONSTANT,
|
||||
TBUFFER_STORE_FORMAT,
|
||||
ATOMIC_CMP_SWAP,
|
||||
ATOMIC_INC,
|
||||
ATOMIC_DEC,
|
||||
LAST_AMDGPU_ISD_NUMBER
|
||||
};
|
||||
|
||||
|
|
|
@ -333,8 +333,10 @@ class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt,
|
|||
>;
|
||||
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
|
||||
|
@ -344,6 +346,8 @@ def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
|
|||
def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, atomic_cmp_swap_global, i32, v2i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
|
||||
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, atomic_cmp_swap_global, i64, v2i64>;
|
||||
|
||||
} // End Predicates = [isCIVI]
|
||||
|
|
|
@ -128,6 +128,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
|
|||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
||||
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
|
||||
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
|
||||
|
@ -307,6 +309,25 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
|
|||
// TargetLowering queries
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||
const CallInst &CI,
|
||||
unsigned IntrID) const {
|
||||
switch (IntrID) {
|
||||
case Intrinsic::amdgcn_atomic_inc:
|
||||
case Intrinsic::amdgcn_atomic_dec:
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
Info.memVT = MVT::getVT(CI.getType());
|
||||
Info.ptrVal = CI.getOperand(0);
|
||||
Info.align = 0;
|
||||
Info.vol = false;
|
||||
Info.readMem = true;
|
||||
Info.writeMem = true;
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
|
||||
EVT) const {
|
||||
// SI has some legal vector types, but no legal vector operations. Say no
|
||||
|
@ -1173,6 +1194,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|||
return LowerGlobalAddress(MFI, Op, DAG);
|
||||
}
|
||||
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
||||
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
|
||||
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
|
||||
}
|
||||
return SDValue();
|
||||
|
@ -1638,6 +1660,29 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
}
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
switch (IntrID) {
|
||||
case Intrinsic::amdgcn_atomic_inc:
|
||||
case Intrinsic::amdgcn_atomic_dec: {
|
||||
MemSDNode *M = cast<MemSDNode>(Op);
|
||||
unsigned Opc = (IntrID == Intrinsic::amdgcn_atomic_inc) ?
|
||||
AMDGPUISD::ATOMIC_INC : AMDGPUISD::ATOMIC_DEC;
|
||||
SDValue Ops[] = {
|
||||
M->getOperand(0), // Chain
|
||||
M->getOperand(2), // Ptr
|
||||
M->getOperand(3) // Value
|
||||
};
|
||||
|
||||
return DAG.getMemIntrinsicNode(Opc, SDLoc(Op), M->getVTList(), Ops,
|
||||
M->getMemoryVT(), M->getMemOperand());
|
||||
}
|
||||
default:
|
||||
return SDValue();
|
||||
}
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
@ -2644,7 +2689,9 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case ISD::ATOMIC_LOAD_MIN:
|
||||
case ISD::ATOMIC_LOAD_MAX:
|
||||
case ISD::ATOMIC_LOAD_UMIN:
|
||||
case ISD::ATOMIC_LOAD_UMAX: { // TODO: Target mem intrinsics.
|
||||
case ISD::ATOMIC_LOAD_UMAX:
|
||||
case AMDGPUISD::ATOMIC_INC:
|
||||
case AMDGPUISD::ATOMIC_DEC: { // TODO: Target mem intrinsics.
|
||||
if (DCI.isBeforeLegalize())
|
||||
break;
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
|
|||
MVT VT, unsigned Offset) const;
|
||||
|
||||
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -66,6 +67,9 @@ class SITargetLowering final : public AMDGPUTargetLowering {
|
|||
public:
|
||||
SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
|
||||
|
||||
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
|
||||
unsigned IntrinsicID) const override;
|
||||
|
||||
bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
|
||||
EVT /*VT*/) const override;
|
||||
|
||||
|
|
|
@ -95,6 +95,14 @@ def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
|
|||
[SDNPMayLoad, SDNPMemOperand]
|
||||
>;
|
||||
|
||||
def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
|
||||
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
|
||||
>;
|
||||
|
||||
def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
|
||||
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
|
||||
>;
|
||||
|
||||
def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
|
||||
SDTypeProfile<0, 13,
|
||||
[SDTCisVT<0, v4i32>, // rsrc(SGPR)
|
||||
|
@ -180,6 +188,13 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
|
|||
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N);
|
||||
}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PatFrags for global memory operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def atomic_inc_global : global_binary_atomic_op<SIatomic_inc>;
|
||||
def atomic_dec_global : global_binary_atomic_op<SIatomic_dec>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1
|
||||
// to be glued to the memory instructions.
|
||||
|
@ -279,9 +294,10 @@ def si_uniform_br_scc : PatFrag <
|
|||
return isCBranchSCC(N);
|
||||
}]>;
|
||||
|
||||
multiclass SIAtomicM0Glue2 <string op_name> {
|
||||
multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0> {
|
||||
|
||||
def _glue : SDNode <"ISD::ATOMIC_"#op_name, SDTAtomic2,
|
||||
def _glue : SDNode <
|
||||
!if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, SDTAtomic2,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
|
||||
>;
|
||||
|
||||
|
@ -289,11 +305,13 @@ multiclass SIAtomicM0Glue2 <string op_name> {
|
|||
}
|
||||
|
||||
defm si_atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
|
||||
defm si_atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
|
||||
defm si_atomic_inc : SIAtomicM0Glue2 <"INC", 1>;
|
||||
defm si_atomic_dec : SIAtomicM0Glue2 <"DEC", 1>;
|
||||
defm si_atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
|
||||
defm si_atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
|
||||
defm si_atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
|
||||
defm si_atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
|
||||
defm si_atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
|
||||
defm si_atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
|
||||
defm si_atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
|
||||
defm si_atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
|
||||
|
|
|
@ -1042,8 +1042,13 @@ defm BUFFER_ATOMIC_OR : MUBUF_Atomic <
|
|||
defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
|
||||
mubuf<0x3b, 0x4a>, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
|
||||
>;
|
||||
//def BUFFER_ATOMIC_INC : MUBUF_ <mubuf<0x3c, 0x4b>, "buffer_atomic_inc", []>;
|
||||
//def BUFFER_ATOMIC_DEC : MUBUF_ <mubuf<0x3d, 0x4c>, "buffer_atomic_dec", []>;
|
||||
defm BUFFER_ATOMIC_INC : MUBUF_Atomic <
|
||||
mubuf<0x3c, 0x4b>, "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global
|
||||
>;
|
||||
defm BUFFER_ATOMIC_DEC : MUBUF_Atomic <
|
||||
mubuf<0x3d, 0x4c>, "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global
|
||||
>;
|
||||
|
||||
//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <mubuf<0x3e>, "buffer_atomic_fcmpswap", []>; // isn't on VI
|
||||
//def BUFFER_ATOMIC_FMIN : MUBUF_ <mubuf<0x3f>, "buffer_atomic_fmin", []>; // isn't on VI
|
||||
//def BUFFER_ATOMIC_FMAX : MUBUF_ <mubuf<0x40>, "buffer_atomic_fmax", []>; // isn't on VI
|
||||
|
@ -1061,8 +1066,12 @@ defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Atomic <
|
|||
//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <mubuf<0x59, 0x68>, "buffer_atomic_and_x2", []>;
|
||||
//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <mubuf<0x5a, 0x69>, "buffer_atomic_or_x2", []>;
|
||||
//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <mubuf<0x5b, 0x6a>, "buffer_atomic_xor_x2", []>;
|
||||
//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <mubuf<0x5c, 0x6b>, "buffer_atomic_inc_x2", []>;
|
||||
//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <mubuf<0x5d, 0x6c>, "buffer_atomic_dec_x2", []>;
|
||||
defm BUFFER_ATOMIC_INC_X2 : MUBUF_Atomic <
|
||||
mubuf<0x5c, 0x6b>, "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global
|
||||
>;
|
||||
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Atomic <
|
||||
mubuf<0x5d, 0x6c>, "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global
|
||||
>;
|
||||
//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <mubuf<0x5e>, "buffer_atomic_fcmpswap_x2", []>; // isn't on VI
|
||||
//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <mubuf<0x5f>, "buffer_atomic_fmin_x2", []>; // isn't on VI
|
||||
//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <mubuf<0x60>, "buffer_atomic_fmax_x2", []>; // isn't on VI
|
||||
|
@ -3073,6 +3082,8 @@ class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
|
|||
def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, si_atomic_swap_local>;
|
||||
def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, si_atomic_load_add_local>;
|
||||
def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, si_atomic_load_sub_local>;
|
||||
def : DSAtomicRetPat<DS_INC_RTN_U32, i32, si_atomic_inc_local>;
|
||||
def : DSAtomicRetPat<DS_DEC_RTN_U32, i32, si_atomic_dec_local>;
|
||||
def : DSAtomicRetPat<DS_AND_RTN_B32, i32, si_atomic_load_and_local>;
|
||||
def : DSAtomicRetPat<DS_OR_RTN_B32, i32, si_atomic_load_or_local>;
|
||||
def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, si_atomic_load_xor_local>;
|
||||
|
@ -3080,13 +3091,14 @@ def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, si_atomic_load_min_local>;
|
|||
def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, si_atomic_load_max_local>;
|
||||
def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, si_atomic_load_umin_local>;
|
||||
def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, si_atomic_load_umax_local>;
|
||||
|
||||
def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;
|
||||
|
||||
// 64-bit atomics.
|
||||
def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
|
||||
def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;
|
||||
def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, si_atomic_load_sub_local>;
|
||||
def : DSAtomicRetPat<DS_INC_RTN_U64, i64, si_atomic_inc_local>;
|
||||
def : DSAtomicRetPat<DS_DEC_RTN_U64, i64, si_atomic_dec_local>;
|
||||
def : DSAtomicRetPat<DS_AND_RTN_B64, i64, si_atomic_load_and_local>;
|
||||
def : DSAtomicRetPat<DS_OR_RTN_B64, i64, si_atomic_load_or_local>;
|
||||
def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, si_atomic_load_xor_local>;
|
||||
|
|
|
@ -0,0 +1,251 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32) #2
|
||||
declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
|
||||
|
||||
declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64) #2
|
||||
declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64) #2
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
|
||||
define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
|
||||
store i32 %result, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
|
||||
define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
|
||||
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
|
||||
store i32 %result, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
|
||||
; GCN: s_load_dword [[SPTR:s[0-9]+]],
|
||||
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; GCN: ds_dec_u32 [[VPTR]], [[DATA]]
|
||||
define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; GCN: ds_dec_u32 v{{[0-9]+}}, [[K]] offset:16
|
||||
define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_dec_ret_i32:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
|
||||
define void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
|
||||
store i32 %result, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
|
||||
define void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
|
||||
store i32 %result, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32:
|
||||
; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
define void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind {
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32_offset:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
|
||||
define void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset_addr64:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
|
||||
; VI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
|
||||
define void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
|
||||
store i32 %result, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_dec_noret_i32_offset_addr64:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
|
||||
; VI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
|
||||
define void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
|
||||
%result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
@lds0 = addrspace(3) global [512 x i32] undef
|
||||
|
||||
; SI-LABEL: {{^}}atomic_dec_shl_base_lds_0:
|
||||
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
||||
; SI: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]] offset:8
|
||||
define void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
|
||||
%val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9)
|
||||
store i32 %idx.0, i32 addrspace(1)* %add_use
|
||||
store i32 %val0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
|
||||
define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
|
||||
define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
|
||||
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
|
||||
define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
|
||||
define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_dec_ret_i64:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
|
||||
define void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
|
||||
define void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
define void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind {
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64_offset:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
|
||||
define void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
|
||||
; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
|
||||
define void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
|
||||
store i64 %result, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
|
||||
; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
|
||||
define void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
|
||||
%result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
@lds1 = addrspace(3) global [512 x i64] undef, align 8
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0_i64:
|
||||
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
|
||||
; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
|
||||
define void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
|
||||
%val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9)
|
||||
store i32 %idx.0, i32 addrspace(1)* %add_use
|
||||
store i64 %val0, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { nounwind argmemonly }
|
|
@ -0,0 +1,251 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32) #2
|
||||
declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
|
||||
|
||||
declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64) #2
|
||||
declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64) #2
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]
|
||||
define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
|
||||
store i32 %result, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16
|
||||
define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
|
||||
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
|
||||
store i32 %result, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
|
||||
; GCN: s_load_dword [[SPTR:s[0-9]+]],
|
||||
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
|
||||
; GCN: ds_inc_u32 [[VPTR]], [[DATA]]
|
||||
define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16
|
||||
define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_inc_ret_i32:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
|
||||
define void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
|
||||
store i32 %result, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
|
||||
define void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
|
||||
store i32 %result, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_atomic_inc_noret_i32:
|
||||
; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
define void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind {
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_atomic_inc_noret_i32_offset:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
|
||||
define void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset_addr64:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}}
|
||||
; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}}
|
||||
define void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
|
||||
%out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
|
||||
store i32 %result, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_inc_noret_i32_offset_addr64:
|
||||
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
|
||||
; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}}
|
||||
; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}}
|
||||
define void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
|
||||
%result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
@lds0 = addrspace(3) global [512 x i32] undef, align 4
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32:
|
||||
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
||||
; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
||||
define void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
|
||||
%val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9)
|
||||
store i32 %idx.0, i32 addrspace(1)* %add_use
|
||||
store i32 %val0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
|
||||
define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32
|
||||
define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
|
||||
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
|
||||
define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: ds_inc_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}}
|
||||
define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
|
||||
%gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_inc_ret_i64:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
|
||||
define void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
|
||||
define void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
|
||||
store i64 %result, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_atomic_inc_noret_i64:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
define void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind {
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}global_atomic_inc_noret_i64_offset:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
|
||||
define void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}}
|
||||
; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}}
|
||||
define void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
|
||||
%out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
|
||||
store i64 %result, i64 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64:
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
|
||||
; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
|
||||
; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}}
|
||||
; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}}
|
||||
define void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
|
||||
%result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
|
||||
ret void
|
||||
}
|
||||
|
||||
@lds1 = addrspace(3) global [512 x i64] undef, align 8
|
||||
|
||||
; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64:
|
||||
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}}
|
||||
; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16
|
||||
define void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
||||
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%idx.0 = add nsw i32 %tid.x, 2
|
||||
%arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
|
||||
%val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9)
|
||||
store i32 %idx.0, i32 addrspace(1)* %add_use
|
||||
store i64 %val0, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
attributes #2 = { nounwind argmemonly }
|
|
@ -324,7 +324,6 @@ define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
|
|||
ret void
|
||||
}
|
||||
|
||||
; XXX - Is it really necessary to load 4 into VGPR?
|
||||
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
|
||||
; GCN: s_load_dword [[SPTR:s[0-9]+]],
|
||||
; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
|
||||
|
|
Loading…
Reference in New Issue