forked from OSchip/llvm-project
AMDGPU/SI: Implement atomic load/store for i32 and i64
Standard load/store instructions with GLC bit set. Reviewers: tstellardAMD, arsenm Differential Revision: http://reviews.llvm.org/D18760 llvm-svn: 265709
This commit is contained in:
parent
9112758077
commit
43b7b5b846
|
@ -81,16 +81,16 @@ private:
|
|||
static bool checkType(const Value *ptr, unsigned int addrspace);
|
||||
static bool checkPrivateAddress(const MachineMemOperand *Op);
|
||||
|
||||
static bool isGlobalStore(const StoreSDNode *N);
|
||||
static bool isFlatStore(const StoreSDNode *N);
|
||||
static bool isGlobalStore(const MemSDNode *N);
|
||||
static bool isFlatStore(const MemSDNode *N);
|
||||
static bool isPrivateStore(const StoreSDNode *N);
|
||||
static bool isLocalStore(const StoreSDNode *N);
|
||||
static bool isRegionStore(const StoreSDNode *N);
|
||||
|
||||
bool isCPLoad(const LoadSDNode *N) const;
|
||||
bool isConstantLoad(const LoadSDNode *N, int cbID) const;
|
||||
bool isGlobalLoad(const LoadSDNode *N) const;
|
||||
bool isFlatLoad(const LoadSDNode *N) const;
|
||||
bool isConstantLoad(const MemSDNode *N, int cbID) const;
|
||||
bool isGlobalLoad(const MemSDNode *N) const;
|
||||
bool isFlatLoad(const MemSDNode *N) const;
|
||||
bool isParamLoad(const LoadSDNode *N) const;
|
||||
bool isPrivateLoad(const LoadSDNode *N) const;
|
||||
bool isLocalLoad(const LoadSDNode *N) const;
|
||||
|
@ -128,6 +128,8 @@ private:
|
|||
SDValue &TFE) const;
|
||||
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
|
||||
SDValue &Offset, SDValue &GLC) const;
|
||||
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
|
||||
SDValue &Offset) const;
|
||||
void SelectMUBUFConstant(SDValue Constant,
|
||||
SDValue &SOffset,
|
||||
SDValue &ImmOffset) const;
|
||||
|
@ -558,7 +560,9 @@ bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
|
||||
bool AMDGPUDAGToDAGISel::isGlobalStore(const MemSDNode *N) {
|
||||
if (!N->writeMem())
|
||||
return false;
|
||||
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
|
||||
}
|
||||
|
||||
|
@ -573,7 +577,9 @@ bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
|
|||
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
|
||||
bool AMDGPUDAGToDAGISel::isFlatStore(const MemSDNode *N) {
|
||||
if (!N->writeMem())
|
||||
return false;
|
||||
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
|
||||
}
|
||||
|
||||
|
@ -581,7 +587,9 @@ bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
|
|||
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
|
||||
bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
|
||||
if (!N->readMem())
|
||||
return false;
|
||||
const Value *MemVal = N->getMemOperand()->getValue();
|
||||
if (CbId == -1)
|
||||
return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
|
||||
|
@ -589,7 +597,9 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
|
|||
return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
|
||||
bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const {
|
||||
if (!N->readMem())
|
||||
return false;
|
||||
if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
|
||||
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
|
||||
N->getMemoryVT().bitsLT(MVT::i32))
|
||||
|
@ -606,7 +616,9 @@ bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const {
|
|||
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const {
|
||||
bool AMDGPUDAGToDAGISel::isFlatLoad(const MemSDNode *N) const {
|
||||
if (!N->readMem())
|
||||
return false;
|
||||
return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
|
||||
}
|
||||
|
||||
|
@ -955,8 +967,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
|||
|
||||
SDLoc DL(Addr);
|
||||
|
||||
GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
if (!GLC.getNode())
|
||||
GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
if (!SLC.getNode())
|
||||
SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
|
||||
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
|
@ -1112,6 +1126,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
|||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &Soffset, SDValue &Offset
|
||||
) const {
|
||||
SDValue GLC, SLC, TFE;
|
||||
|
||||
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
|
||||
}
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &Soffset, SDValue &Offset,
|
||||
SDValue &GLC) const {
|
||||
|
|
|
@ -192,6 +192,11 @@ def global_store : PatFrag<(ops node:$val, node:$ptr),
|
|||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def global_store_atomic : PatFrag<(ops node:$val, node:$ptr),
|
||||
(atomic_store node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<MemSDNode>(N));
|
||||
}]>;
|
||||
|
||||
// Global address space loads
|
||||
def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||
|
|
|
@ -289,6 +289,11 @@ class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
|||
(inst $addr, 0, 0, 0)
|
||||
>;
|
||||
|
||||
class FlatLoadAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(vt (node i64:$addr)),
|
||||
(inst $addr, 1, 0, 0)
|
||||
>;
|
||||
|
||||
def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
|
||||
|
@ -297,17 +302,30 @@ def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
|
|||
def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;
|
||||
|
||||
def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>;
|
||||
def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>;
|
||||
|
||||
|
||||
class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(node vt:$data, i64:$addr),
|
||||
(inst $addr, $data, 0, 0, 0)
|
||||
>;
|
||||
|
||||
class FlatStoreAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
// atomic store follows aotmic binop convenction so the address comes first
|
||||
(node i64:$addr, vt:$data),
|
||||
(inst $addr, $data, 1, 0, 0)
|
||||
>;
|
||||
|
||||
def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
|
||||
|
||||
def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>;
|
||||
def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>;
|
||||
|
||||
class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt,
|
||||
ValueType data_vt = vt> : Pat <
|
||||
(vt (node i64:$addr, data_vt:$data)),
|
||||
|
|
|
@ -140,12 +140,13 @@ def SIconstdata_ptr : SDNode<
|
|||
|
||||
class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
|
||||
(ld node:$ptr), [{
|
||||
return isFlatLoad(dyn_cast<LoadSDNode>(N)) ||
|
||||
isGlobalLoad(dyn_cast<LoadSDNode>(N)) ||
|
||||
isConstantLoad(cast<LoadSDNode>(N), -1);
|
||||
return isFlatLoad(dyn_cast<MemSDNode>(N)) ||
|
||||
isGlobalLoad(dyn_cast<MemSDNode>(N)) ||
|
||||
isConstantLoad(cast<MemSDNode>(N), -1);
|
||||
}]>;
|
||||
|
||||
def flat_load : flat_ld <load>;
|
||||
def atomic_flat_load : flat_ld<atomic_load>;
|
||||
def flat_az_extloadi8 : flat_ld <az_extloadi8>;
|
||||
def flat_sextloadi8 : flat_ld <sextloadi8>;
|
||||
def flat_az_extloadi16 : flat_ld <az_extloadi16>;
|
||||
|
@ -153,11 +154,12 @@ def flat_sextloadi16 : flat_ld <sextloadi16>;
|
|||
|
||||
class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
|
||||
(st node:$val, node:$ptr), [{
|
||||
return isFlatStore(dyn_cast<StoreSDNode>(N)) ||
|
||||
isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
return isFlatStore(dyn_cast<MemSDNode>(N)) ||
|
||||
isGlobalStore(dyn_cast<MemSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def flat_store: flat_st <store>;
|
||||
def atomic_flat_store: flat_st <atomic_store>;
|
||||
def flat_truncstorei8 : flat_st <truncstorei8>;
|
||||
def flat_truncstorei16 : flat_st <truncstorei16>;
|
||||
|
||||
|
@ -167,6 +169,12 @@ def mubuf_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
|
|||
isConstantLoad(cast<LoadSDNode>(N), -1);
|
||||
}]>;
|
||||
|
||||
def mubuf_load_atomic : PatFrag <(ops node:$ptr), (atomic_load node:$ptr), [{
|
||||
return isGlobalLoad(cast<MemSDNode>(N)) ||
|
||||
isConstantLoad(cast<MemSDNode>(N), -1);
|
||||
}]>;
|
||||
|
||||
|
||||
def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
|
||||
return isConstantLoad(cast<LoadSDNode>(N), -1) &&
|
||||
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N);
|
||||
|
@ -721,6 +729,7 @@ def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
|
|||
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
|
||||
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
|
||||
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
|
||||
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
|
||||
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
|
||||
def MUBUFIntrinsicOffset : ComplexPattern<i32, 2, "SelectMUBUFIntrinsicOffset">;
|
||||
def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset">;
|
||||
|
|
|
@ -3102,20 +3102,35 @@ def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, si_atomic_cmp_swap_64_local>;
|
|||
// MUBUF Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
|
||||
PatFrag constant_ld> {
|
||||
def : Pat <
|
||||
class MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
|
||||
PatFrag constant_ld> : Pat <
|
||||
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
|
||||
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
|
||||
>;
|
||||
|
||||
multiclass MUBUFLoad_Atomic_Pattern <MUBUF Instr_ADDR64, MUBUF Instr_OFFSET,
|
||||
ValueType vt, PatFrag atomic_ld> {
|
||||
def : Pat <
|
||||
(vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
i16:$offset, i1:$slc))),
|
||||
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
|
||||
(Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0)
|
||||
>;
|
||||
}
|
||||
|
||||
let Predicates = [isSICI] in {
|
||||
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
|
||||
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
|
||||
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
|
||||
defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
|
||||
def : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
|
||||
def : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
|
||||
def : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
|
||||
def : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
|
||||
|
||||
defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, mubuf_load_atomic>;
|
||||
defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, mubuf_load_atomic>;
|
||||
} // End Predicates = [isSICI]
|
||||
|
||||
class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
|
||||
|
@ -3176,6 +3191,25 @@ defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_
|
|||
defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
|
||||
BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
|
||||
|
||||
multiclass MUBUFStore_Atomic_Pattern <MUBUF Instr_ADDR64, MUBUF Instr_OFFSET,
|
||||
ValueType vt, PatFrag atomic_st> {
|
||||
// Store follows atomic op convention so address is forst
|
||||
def : Pat <
|
||||
(atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
i16:$offset, i1:$slc), vt:$val),
|
||||
(Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
|
||||
(Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0)
|
||||
>;
|
||||
}
|
||||
let Predicates = [isSICI] in {
|
||||
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, global_store_atomic>;
|
||||
defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, global_store_atomic>;
|
||||
} // End Predicates = [isSICI]
|
||||
|
||||
class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
|
||||
(st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
|
||||
u16imm:$offset)),
|
||||
|
|
|
@ -930,3 +930,181 @@ entry:
|
|||
store i32 %0, i32 addrspace(1)* %out2
|
||||
ret void
|
||||
}
|
||||
|
||||
; ATOMIC_LOAD
|
||||
; FUNC-LABEL: {{^}}atomic_load_i32_offset:
|
||||
; SI: buffer_load_dword [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
|
||||
; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN: buffer_store_dword [[RET]]
|
||||
define void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %in, i32 4
|
||||
%0 = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
|
||||
store i32 %0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_load_i32:
|
||||
; SI: buffer_load_dword [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
|
||||
; GCN: buffer_store_dword [[RET]]
|
||||
define void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4
|
||||
store i32 %0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_load_i32_addr64_offset:
|
||||
; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
|
||||
; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
|
||||
; GCN: buffer_store_dword [[RET]]
|
||||
define void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
|
||||
%0 = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
|
||||
store i32 %0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_load_i32_addr64:
|
||||
; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
|
||||
; GCN: buffer_store_dword [[RET]]
|
||||
define void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
|
||||
%0 = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4
|
||||
store i32 %0, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_load_i64_offset:
|
||||
; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
|
||||
; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
; GCN: buffer_store_dwordx2 [[RET]]
|
||||
define void @atomic_load_i64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %in, i64 4
|
||||
%0 = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8
|
||||
store i64 %0, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_load_i64:
|
||||
; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
|
||||
; GCN: buffer_store_dwordx2 [[RET]]
|
||||
define void @atomic_load_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = load atomic i64, i64 addrspace(1)* %in seq_cst, align 8
|
||||
store i64 %0, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_load_i64_addr64_offset:
|
||||
; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
|
||||
; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
|
||||
; GCN: buffer_store_dwordx2 [[RET]]
|
||||
define void @atomic_load_i64_addr64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
|
||||
%0 = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8
|
||||
store i64 %0, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_load_i64_addr64:
|
||||
; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
|
||||
; GCN: buffer_store_dwordx2 [[RET]]
|
||||
define void @atomic_load_i64_addr64(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index
|
||||
%0 = load atomic i64, i64 addrspace(1)* %ptr seq_cst, align 8
|
||||
store i64 %0, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; ATOMIC_STORE
|
||||
; FUNC-LABEL: {{^}}atomic_store_i32_offset:
|
||||
; SI: buffer_store_dword {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
|
||||
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
|
||||
define void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
|
||||
store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_store_i32:
|
||||
; SI: buffer_store_dword {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc{{$}}
|
||||
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
|
||||
define void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_store_i32_addr64_offset:
|
||||
; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
|
||||
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
|
||||
define void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
|
||||
store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_store_i32_addr64:
|
||||
; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
|
||||
define void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
|
||||
store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_store_i64_offset:
|
||||
; SI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
|
||||
; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
|
||||
define void @atomic_store_i64_offset(i64 %in, i64 addrspace(1)* %out) {
|
||||
entry:
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
|
||||
store atomic i64 %in, i64 addrspace(1)* %gep seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_store_i64:
|
||||
; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
|
||||
; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] glc
|
||||
define void @atomic_store_i64(i64 %in, i64 addrspace(1)* %out) {
|
||||
entry:
|
||||
store atomic i64 %in, i64 addrspace(1)* %out seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_store_i64_addr64_offset:
|
||||
; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
|
||||
; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] glc{{$}}
|
||||
define void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(1)* %out, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
|
||||
%gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4
|
||||
store atomic i64 %in, i64 addrspace(1)* %gep seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}atomic_store_i64_addr64:
|
||||
; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
|
||||
; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] glc{{$}}
|
||||
define void @atomic_store_i64_addr64(i64 %in, i64 addrspace(1)* %out, i64 %index) {
|
||||
entry:
|
||||
%ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index
|
||||
store atomic i64 %in, i64 addrspace(1)* %ptr seq_cst, align 8
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue