forked from OSchip/llvm-project
[AMDGPU][GFX10] Support base+soffset+offset SMEM stores.
Also makes another step towards resolving https://github.com/llvm/llvm-project/issues/38652 Reviewed By: foad, dp Differential Revision: https://reviews.llvm.org/D125380
This commit is contained in:
parent
82ea0d8b82
commit
cb67b2ccc4
|
@ -110,10 +110,11 @@ class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag>
|
|||
let has_dlc = 1;
|
||||
}
|
||||
|
||||
class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
|
||||
: SM_Pseudo<opName, (outs), ins, asmOps, pattern> {
|
||||
RegisterClass BaseClass;
|
||||
RegisterClass SrcClass;
|
||||
class SM_Store_Pseudo <string opName, RegisterClass baseClass,
|
||||
RegisterClass srcClass, dag ins, string asmOps>
|
||||
: SM_Pseudo<opName, (outs), ins, asmOps, []> {
|
||||
RegisterClass BaseClass = baseClass;
|
||||
RegisterClass SrcClass = srcClass;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 1;
|
||||
let has_glc = 1;
|
||||
|
@ -177,23 +178,28 @@ multiclass SM_Pseudo_Loads<string opName,
|
|||
multiclass SM_Pseudo_Stores<string opName,
|
||||
RegisterClass baseClass,
|
||||
RegisterClass srcClass> {
|
||||
def _IMM : SM_Store_Pseudo <opName,
|
||||
def _IMM : SM_Store_Pseudo <opName, baseClass, srcClass,
|
||||
(ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, CPol:$cpol),
|
||||
" $sdata, $sbase, $offset$cpol", []> {
|
||||
" $sdata, $sbase, $offset$cpol"> {
|
||||
let has_offset = 1;
|
||||
let BaseClass = baseClass;
|
||||
let SrcClass = srcClass;
|
||||
let PseudoInstr = opName # "_IMM";
|
||||
}
|
||||
|
||||
def _SGPR : SM_Store_Pseudo <opName,
|
||||
def _SGPR : SM_Store_Pseudo <opName, baseClass, srcClass,
|
||||
(ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soffset, CPol:$cpol),
|
||||
" $sdata, $sbase, $soffset$cpol", []> {
|
||||
" $sdata, $sbase, $soffset$cpol"> {
|
||||
let has_soffset = 1;
|
||||
let BaseClass = baseClass;
|
||||
let SrcClass = srcClass;
|
||||
let PseudoInstr = opName # "_SGPR";
|
||||
}
|
||||
|
||||
def _SGPR_IMM : SM_Store_Pseudo <opName, baseClass, srcClass,
|
||||
(ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soffset, i32imm:$offset,
|
||||
CPol:$cpol),
|
||||
" $sdata, $sbase, $soffset$offset$cpol"> {
|
||||
let has_offset = 1;
|
||||
let has_soffset = 1;
|
||||
let PseudoInstr = opName # "_SGPR_IMM";
|
||||
}
|
||||
}
|
||||
|
||||
multiclass SM_Pseudo_Discards<string opName> {
|
||||
|
@ -948,6 +954,11 @@ multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
|
|||
def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
|
||||
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol);
|
||||
}
|
||||
|
||||
def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Store_Pseudo>(ps#_SGPR_IMM)> {
|
||||
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase,
|
||||
SReg_32:$soffset, smem_offset_mod:$offset, CPol:$cpol);
|
||||
}
|
||||
}
|
||||
|
||||
defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;
|
||||
|
|
|
@ -581,6 +581,9 @@ s_store_dword s1, s[4:5], null
|
|||
s_store_dword s1, s[4:5], 0x0
|
||||
// GFX10: encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xfa]
|
||||
|
||||
s_store_dword s1, s[4:5], s0 offset:0x12345
|
||||
// GFX10: encoding: [0x42,0x00,0x40,0xf4,0x45,0x23,0x01,0x00]
|
||||
|
||||
s_store_dword s1, s[4:5], s0 glc
|
||||
// GFX10: encoding: [0x42,0x00,0x41,0xf4,0x00,0x00,0x00,0x00]
|
||||
|
||||
|
@ -593,6 +596,9 @@ s_store_dword s1, s[4:5], s0 glc dlc
|
|||
s_store_dword s1, s[4:5], 0x1234 glc dlc
|
||||
// GFX10: encoding: [0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa]
|
||||
|
||||
s_store_dword s1, s[4:5], s0 offset:0x12345 glc dlc
|
||||
// GFX10: encoding: [0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00]
|
||||
|
||||
s_store_dwordx2 s[2:3], s[4:5], s0
|
||||
// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0x00]
|
||||
|
||||
|
@ -632,6 +638,9 @@ s_store_dwordx2 s[2:3], s[4:5], null
|
|||
s_store_dwordx2 s[2:3], s[4:5], 0x0
|
||||
// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xfa]
|
||||
|
||||
s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345
|
||||
// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x45,0x23,0x01,0x00]
|
||||
|
||||
s_store_dwordx2 s[2:3], s[4:5], s0 glc
|
||||
// GFX10: encoding: [0x82,0x00,0x45,0xf4,0x00,0x00,0x00,0x00]
|
||||
|
||||
|
@ -644,6 +653,9 @@ s_store_dwordx2 s[2:3], s[4:5], s0 glc dlc
|
|||
s_store_dwordx2 s[2:3], s[4:5], 0x1234 glc dlc
|
||||
// GFX10: encoding: [0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa]
|
||||
|
||||
s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345 glc dlc
|
||||
// GFX10: encoding: [0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00]
|
||||
|
||||
s_store_dwordx4 s[4:7], s[4:5], s0
|
||||
// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0x00]
|
||||
|
||||
|
@ -680,6 +692,9 @@ s_store_dwordx4 s[4:7], s[4:5], null
|
|||
s_store_dwordx4 s[4:7], s[4:5], 0x0
|
||||
// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xfa]
|
||||
|
||||
s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345
|
||||
// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x45,0x23,0x01,0x00]
|
||||
|
||||
s_store_dwordx4 s[4:7], s[4:5], s0 glc
|
||||
// GFX10: encoding: [0x02,0x01,0x49,0xf4,0x00,0x00,0x00,0x00]
|
||||
|
||||
|
@ -692,6 +707,9 @@ s_store_dwordx4 s[4:7], s[4:5], s0 glc dlc
|
|||
s_store_dwordx4 s[4:7], s[4:5], 0x1234 glc dlc
|
||||
// GFX10: encoding: [0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa]
|
||||
|
||||
s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345 glc dlc
|
||||
// GFX10: encoding: [0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00]
|
||||
|
||||
s_buffer_store_dword s1, s[8:11], s0
|
||||
// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0x00]
|
||||
|
||||
|
@ -728,6 +746,9 @@ s_buffer_store_dword s1, s[8:11], null
|
|||
s_buffer_store_dword s1, s[8:11], 0x0
|
||||
// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xfa]
|
||||
|
||||
s_buffer_store_dword s1, s[8:11], s0 offset:0x12345
|
||||
// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x45,0x23,0x01,0x00]
|
||||
|
||||
s_buffer_store_dword s1, s[8:11], s0 glc
|
||||
// GFX10: encoding: [0x44,0x00,0x61,0xf4,0x00,0x00,0x00,0x00]
|
||||
|
||||
|
@ -740,6 +761,9 @@ s_buffer_store_dword s1, s[8:11], s0 glc dlc
|
|||
s_buffer_store_dword s1, s[8:11], 0x1234 glc dlc
|
||||
// GFX10: encoding: [0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa]
|
||||
|
||||
s_buffer_store_dword s1, s[8:11], s0 offset:0x12345 glc dlc
|
||||
// GFX10: encoding: [0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00]
|
||||
|
||||
s_buffer_store_dwordx2 s[2:3], s[8:11], s0
|
||||
// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0x00]
|
||||
|
||||
|
@ -776,6 +800,9 @@ s_buffer_store_dwordx2 s[2:3], s[8:11], null
|
|||
s_buffer_store_dwordx2 s[2:3], s[8:11], 0x0
|
||||
// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xfa]
|
||||
|
||||
s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345
|
||||
// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x45,0x23,0x01,0x00]
|
||||
|
||||
s_buffer_store_dwordx2 s[2:3], s[8:11], s0 glc
|
||||
// GFX10: encoding: [0x84,0x00,0x65,0xf4,0x00,0x00,0x00,0x00]
|
||||
|
||||
|
@ -788,6 +815,9 @@ s_buffer_store_dwordx2 s[2:3], s[8:11], s0 glc dlc
|
|||
s_buffer_store_dwordx2 s[2:3], s[8:11], 0x1234 glc dlc
|
||||
// GFX10: encoding: [0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa]
|
||||
|
||||
s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345 glc dlc
|
||||
// GFX10: encoding: [0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00]
|
||||
|
||||
s_buffer_store_dwordx4 s[4:7], s[8:11], s0
|
||||
// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0x00]
|
||||
|
||||
|
@ -821,6 +851,9 @@ s_buffer_store_dwordx4 s[4:7], s[8:11], null
|
|||
s_buffer_store_dwordx4 s[4:7], s[8:11], 0x0
|
||||
// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xfa]
|
||||
|
||||
s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345
|
||||
// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x45,0x23,0x01,0x00]
|
||||
|
||||
s_buffer_store_dwordx4 s[4:7], s[8:11], s0 glc
|
||||
// GFX10: encoding: [0x04,0x01,0x69,0xf4,0x00,0x00,0x00,0x00]
|
||||
|
||||
|
@ -833,6 +866,9 @@ s_buffer_store_dwordx4 s[4:7], s[8:11], s0 glc dlc
|
|||
s_buffer_store_dwordx4 s[4:7], s[8:11], 0x1234 glc dlc
|
||||
// GFX10: encoding: [0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa]
|
||||
|
||||
s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345 glc dlc
|
||||
// GFX10: encoding: [0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00]
|
||||
|
||||
s_memrealtime s[10:11]
|
||||
// GFX10: encoding: [0x80,0x02,0x94,0xf4,0x00,0x00,0x00,0x00]
|
||||
|
||||
|
@ -893,6 +929,9 @@ s_scratch_store_dword s101, s[4:5], s0
|
|||
s_scratch_store_dword s1, s[4:5], 0x123 glc
|
||||
// GFX10: encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa]
|
||||
|
||||
s_scratch_store_dword s1, s[4:5], s0 offset:0x12345 glc
|
||||
// GFX10: encoding: [0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00]
|
||||
|
||||
s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc
|
||||
// GFX10: encoding: [0x82,0x00,0x59,0xf4,0x00,0x00,0x00,0xca]
|
||||
|
||||
|
|
|
@ -11591,6 +11591,9 @@
|
|||
# GFX10: s_buffer_store_dword s1, s[8:11], 0x1234 glc dlc ; encoding: [0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa]
|
||||
0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa
|
||||
|
||||
# GFX10: s_buffer_store_dword s1, s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00]
|
||||
0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00
|
||||
|
||||
# GFX10: s_buffer_store_dword s1, s[8:11], m0 ; encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xf8]
|
||||
0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xf8
|
||||
|
||||
|
@ -11639,6 +11642,9 @@
|
|||
# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], 0x1234 glc dlc ; encoding: [0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa]
|
||||
0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa
|
||||
|
||||
# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00]
|
||||
0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00
|
||||
|
||||
# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], m0 ; encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xf8]
|
||||
0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xf8
|
||||
|
||||
|
@ -11681,6 +11687,9 @@
|
|||
# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], 0x1234 glc dlc ; encoding: [0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa]
|
||||
0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa
|
||||
|
||||
# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00]
|
||||
0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00
|
||||
|
||||
# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], m0 ; encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xf8]
|
||||
0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xf8
|
||||
|
||||
|
@ -18086,6 +18095,9 @@
|
|||
# GFX10: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa]
|
||||
0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa
|
||||
|
||||
# GFX10: s_scratch_store_dword s1, s[4:5], s0 offset:0x12345 glc ; encoding: [0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00]
|
||||
0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00
|
||||
|
||||
# GFX10: s_scratch_store_dword s101, s[4:5], s0 ; encoding: [0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00]
|
||||
0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00
|
||||
|
||||
|
@ -18323,6 +18335,9 @@
|
|||
# GFX10: s_store_dword s1, s[4:5], 0x1234 glc dlc ; encoding: [0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa]
|
||||
0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa
|
||||
|
||||
# GFX10: s_store_dword s1, s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00]
|
||||
0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00
|
||||
|
||||
# GFX10: s_store_dword s1, s[4:5], m0 ; encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xf8]
|
||||
0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xf8
|
||||
|
||||
|
@ -18374,6 +18389,9 @@
|
|||
# GFX10: s_store_dwordx2 s[2:3], s[4:5], 0x1234 glc dlc ; encoding: [0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa]
|
||||
0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa
|
||||
|
||||
# GFX10: s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00]
|
||||
0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00
|
||||
|
||||
# GFX10: s_store_dwordx2 s[2:3], s[4:5], m0 ; encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xf8]
|
||||
0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xf8
|
||||
|
||||
|
@ -18419,6 +18437,9 @@
|
|||
# GFX10: s_store_dwordx4 s[4:7], s[4:5], 0x1234 glc dlc ; encoding: [0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa]
|
||||
0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa
|
||||
|
||||
# GFX10: s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00]
|
||||
0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00
|
||||
|
||||
# GFX10: s_store_dwordx4 s[4:7], s[4:5], m0 ; encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xf8]
|
||||
0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xf8
|
||||
|
||||
|
|
Loading…
Reference in New Issue