forked from OSchip/llvm-project
AMDGPU: Select scratch mubuf offsets when pointer is a constant
In call sequence setups, there may not be a frame index base and the pointer is a constant offset from the frame pointer / scratch wave offset register. llvm-svn: 301230
This commit is contained in:
parent
df6539f44b
commit
0774ea267a
|
@ -116,8 +116,11 @@ private:
|
|||
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
|
||||
SDValue &SLC) const;
|
||||
bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
|
||||
SDValue &SOffset, SDValue &ImmOffset) const;
|
||||
bool SelectMUBUFScratchOffen(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
|
||||
SDValue &SOffset, SDValue &ImmOffset) const;
|
||||
bool SelectMUBUFScratchOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
|
||||
SDValue &Offset) const;
|
||||
|
||||
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
|
||||
SDValue &Offset, SDValue &GLC, SDValue &SLC,
|
||||
SDValue &TFE) const;
|
||||
|
@ -953,8 +956,12 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool isLegalMUBUFImmOffset(unsigned Imm) {
|
||||
return isUInt<12>(Imm);
|
||||
}
|
||||
|
||||
static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
|
||||
return isUInt<12>(Imm->getZExtValue());
|
||||
return isLegalMUBUFImmOffset(Imm->getZExtValue());
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
||||
|
@ -1076,9 +1083,9 @@ SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
|
|||
return N;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
|
||||
SDValue &VAddr, SDValue &SOffset,
|
||||
SDValue &ImmOffset) const {
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc,
|
||||
SDValue &VAddr, SDValue &SOffset,
|
||||
SDValue &ImmOffset) const {
|
||||
|
||||
SDLoc DL(Addr);
|
||||
MachineFunction &MF = CurDAG->getMachineFunction();
|
||||
|
@ -1087,8 +1094,22 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
|
|||
Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
|
||||
SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
|
||||
|
||||
// (add n0, c1)
|
||||
if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
|
||||
unsigned Imm = CAddr->getZExtValue();
|
||||
assert(!isLegalMUBUFImmOffset(Imm) &&
|
||||
"should have been selected by other pattern");
|
||||
|
||||
SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
|
||||
MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
|
||||
DL, MVT::i32, HighBits);
|
||||
VAddr = SDValue(MovHighBits, 0);
|
||||
ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
||||
// (add n0, c1)
|
||||
|
||||
SDValue N0 = Addr.getOperand(0);
|
||||
SDValue N1 = Addr.getOperand(1);
|
||||
|
||||
|
@ -1107,6 +1128,24 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDValue Addr,
|
||||
SDValue &SRsrc,
|
||||
SDValue &SOffset,
|
||||
SDValue &Offset) const {
|
||||
ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
|
||||
if (!CAddr || !isLegalMUBUFImmOffset(CAddr))
|
||||
return false;
|
||||
|
||||
SDLoc DL(Addr);
|
||||
MachineFunction &MF = CurDAG->getMachineFunction();
|
||||
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
|
||||
SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
|
||||
Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &SOffset, SDValue &Offset,
|
||||
SDValue &GLC, SDValue &SLC,
|
||||
|
|
|
@ -11,7 +11,9 @@ def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
|
|||
def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
|
||||
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
|
||||
|
||||
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
|
||||
def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen">;
|
||||
def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [], 20>;
|
||||
|
||||
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
|
||||
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
|
||||
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
|
||||
|
@ -958,21 +960,30 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, mubuf_az_extloadi8>;
|
|||
|
||||
} // End Predicates = [Has16BitInsts]
|
||||
|
||||
class MUBUFScratchLoadPat <MUBUF_Pseudo Instr, ValueType vt, PatFrag ld> : Pat <
|
||||
(vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
|
||||
i32:$soffset, u16imm:$offset))),
|
||||
(Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
|
||||
MUBUF_Pseudo InstrOffset,
|
||||
ValueType vt, PatFrag ld> {
|
||||
def : Pat <
|
||||
(vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
|
||||
i32:$soffset, u16imm:$offset))),
|
||||
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>;
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i16, sextloadi8_private>;
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i16, extloadi8_private>;
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>;
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>;
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>;
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>;
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>;
|
||||
def : Pat <
|
||||
(vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
|
||||
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
}
|
||||
|
||||
defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i32, sextloadi8_private>;
|
||||
defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, extloadi8_private>;
|
||||
defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_private>;
|
||||
defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_private>;
|
||||
defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET, i32, sextloadi16_private>;
|
||||
defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>;
|
||||
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i32, load_private>;
|
||||
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>;
|
||||
defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>;
|
||||
|
||||
// BUFFER_LOAD_DWORD*, addr64=0
|
||||
multiclass MUBUF_Load_Dword <ValueType vt,
|
||||
|
@ -1054,19 +1065,29 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
|
|||
defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_global>;
|
||||
defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, global_store>;
|
||||
|
||||
class MUBUFScratchStorePat <MUBUF_Pseudo Instr, ValueType vt, PatFrag st> : Pat <
|
||||
(st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
|
||||
u16imm:$offset)),
|
||||
(Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
|
||||
MUBUF_Pseudo InstrOffset,
|
||||
ValueType vt, PatFrag st> {
|
||||
def : Pat <
|
||||
(st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
|
||||
i32:$soffset, u16imm:$offset)),
|
||||
(InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i16, truncstorei8_private>;
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i16, store_private>;
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
|
||||
def : Pat <
|
||||
(st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
|
||||
u16imm:$offset)),
|
||||
(InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
}
|
||||
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i32, truncstorei8_private>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i32, truncstorei16_private>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, i32, store_private>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MTBUF Patterns
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=CI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=GFX9 %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s
|
||||
|
||||
; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
|
||||
; HSA: enable_sgpr_private_segment_buffer = 1
|
||||
|
@ -223,9 +223,8 @@ define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
|
|||
}
|
||||
|
||||
; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast:
|
||||
; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], 0{{$}}
|
||||
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
|
||||
; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
|
||||
; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
|
||||
; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
|
||||
define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
|
||||
%cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)*
|
||||
store volatile i32 7, i32* %cast
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
; RUN: llc -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
|
||||
; Test addressing modes when the scratch base is not a frame index.
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_offset_i8:
|
||||
; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @store_private_offset_i8() #0 {
|
||||
store volatile i8 5, i8* inttoptr (i32 8 to i8*)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_offset_i16:
|
||||
; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @store_private_offset_i16() #0 {
|
||||
store volatile i16 5, i16* inttoptr (i32 8 to i16*)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_offset_i32:
|
||||
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @store_private_offset_i32() #0 {
|
||||
store volatile i32 5, i32* inttoptr (i32 8 to i32*)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_offset_v2i32:
|
||||
; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @store_private_offset_v2i32() #0 {
|
||||
store volatile <2 x i32> <i32 5, i32 10>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_offset_v4i32:
|
||||
; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @store_private_offset_v4i32() #0 {
|
||||
store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}load_private_offset_i8:
|
||||
; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @load_private_offset_i8() #0 {
|
||||
%load = load volatile i8, i8* inttoptr (i32 8 to i8*)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sextload_private_offset_i8:
|
||||
; GCN: buffer_load_sbyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @sextload_private_offset_i8(i32 addrspace(1)* %out) #0 {
|
||||
%load = load volatile i8, i8* inttoptr (i32 8 to i8*)
|
||||
%sextload = sext i8 %load to i32
|
||||
store i32 %sextload, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}zextload_private_offset_i8:
|
||||
; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @zextload_private_offset_i8(i32 addrspace(1)* %out) #0 {
|
||||
%load = load volatile i8, i8* inttoptr (i32 8 to i8*)
|
||||
%zextload = zext i8 %load to i32
|
||||
store i32 %zextload, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}load_private_offset_i16:
|
||||
; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @load_private_offset_i16() #0 {
|
||||
%load = load volatile i16, i16* inttoptr (i32 8 to i16*)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sextload_private_offset_i16:
|
||||
; GCN: buffer_load_sshort v{{[0-9]+}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @sextload_private_offset_i16(i32 addrspace(1)* %out) #0 {
|
||||
%load = load volatile i16, i16* inttoptr (i32 8 to i16*)
|
||||
%sextload = sext i16 %load to i32
|
||||
store i32 %sextload, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}zextload_private_offset_i16:
|
||||
; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @zextload_private_offset_i16(i32 addrspace(1)* %out) #0 {
|
||||
%load = load volatile i16, i16* inttoptr (i32 8 to i16*)
|
||||
%zextload = zext i16 %load to i32
|
||||
store i32 %zextload, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}load_private_offset_i32:
|
||||
; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @load_private_offset_i32() #0 {
|
||||
%load = load volatile i32, i32* inttoptr (i32 8 to i32*)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}load_private_offset_v2i32:
|
||||
; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @load_private_offset_v2i32() #0 {
|
||||
%load = load volatile <2 x i32>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}load_private_offset_v4i32:
|
||||
; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8
|
||||
define amdgpu_kernel void @load_private_offset_v4i32() #0 {
|
||||
%load = load volatile <4 x i32>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_offset_i8_max_offset:
|
||||
; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s8 offset:4095
|
||||
define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 {
|
||||
store volatile i8 5, i8* inttoptr (i32 4095 to i8*)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus1:
|
||||
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
|
||||
; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s8 offen{{$}}
|
||||
define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 {
|
||||
store volatile i8 5, i8* inttoptr (i32 4096 to i8*)
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus2:
|
||||
; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
|
||||
; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s8 offen offset:1{{$}}
|
||||
define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 {
|
||||
store volatile i8 5, i8* inttoptr (i32 4097 to i8*)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=OPT %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=iceland -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=OPTNONE %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI -check-prefix=OPT %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=iceland -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s
|
||||
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=OPTNONE %s
|
||||
|
||||
; There are no stack objects, but still a private memory access. The
|
||||
; private access regiters need to be correctly initialized anyway, and
|
||||
|
@ -27,9 +27,9 @@ define amdgpu_kernel void @store_to_undef() #0 {
|
|||
; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
|
||||
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
|
||||
; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
|
||||
; OPT: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}}
|
||||
; OPT: buffer_store_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}}
|
||||
define amdgpu_kernel void @store_to_inttoptr() #0 {
|
||||
store volatile i32 0, i32* inttoptr (i32 123 to i32*)
|
||||
store volatile i32 0, i32* inttoptr (i32 124 to i32*)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -47,9 +47,9 @@ define amdgpu_kernel void @load_from_undef() #0 {
|
|||
; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1]
|
||||
; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3]
|
||||
; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}}
|
||||
; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}}
|
||||
; OPT: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}}
|
||||
define amdgpu_kernel void @load_from_inttoptr() #0 {
|
||||
%ld = load volatile i32, i32* inttoptr (i32 123 to i32*)
|
||||
%ld = load volatile i32, i32* inttoptr (i32 124 to i32*)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue