[AMDGPU][GFX10] Support base+soffset+offset SMEM stores.

Also makes another step towards resolving
https://github.com/llvm/llvm-project/issues/38652

Reviewed By: foad, dp

Differential Revision: https://reviews.llvm.org/D125380
This commit is contained in:
Ivan Kosarev 2022-05-12 08:25:33 +01:00
parent 82ea0d8b82
commit cb67b2ccc4
3 changed files with 83 additions and 12 deletions

View File

@ -110,10 +110,11 @@ class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag>
let has_dlc = 1;
}
class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
: SM_Pseudo<opName, (outs), ins, asmOps, pattern> {
RegisterClass BaseClass;
RegisterClass SrcClass;
class SM_Store_Pseudo <string opName, RegisterClass baseClass,
RegisterClass srcClass, dag ins, string asmOps>
: SM_Pseudo<opName, (outs), ins, asmOps, []> {
RegisterClass BaseClass = baseClass;
RegisterClass SrcClass = srcClass;
let mayLoad = 0;
let mayStore = 1;
let has_glc = 1;
@ -177,23 +178,28 @@ multiclass SM_Pseudo_Loads<string opName,
multiclass SM_Pseudo_Stores<string opName,
RegisterClass baseClass,
RegisterClass srcClass> {
def _IMM : SM_Store_Pseudo <opName,
def _IMM : SM_Store_Pseudo <opName, baseClass, srcClass,
(ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, CPol:$cpol),
" $sdata, $sbase, $offset$cpol", []> {
" $sdata, $sbase, $offset$cpol"> {
let has_offset = 1;
let BaseClass = baseClass;
let SrcClass = srcClass;
let PseudoInstr = opName # "_IMM";
}
def _SGPR : SM_Store_Pseudo <opName,
def _SGPR : SM_Store_Pseudo <opName, baseClass, srcClass,
(ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soffset, CPol:$cpol),
" $sdata, $sbase, $soffset$cpol", []> {
" $sdata, $sbase, $soffset$cpol"> {
let has_soffset = 1;
let BaseClass = baseClass;
let SrcClass = srcClass;
let PseudoInstr = opName # "_SGPR";
}
def _SGPR_IMM : SM_Store_Pseudo <opName, baseClass, srcClass,
(ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soffset, i32imm:$offset,
CPol:$cpol),
" $sdata, $sbase, $soffset$offset$cpol"> {
let has_offset = 1;
let has_soffset = 1;
let PseudoInstr = opName # "_SGPR_IMM";
}
}
multiclass SM_Pseudo_Discards<string opName> {
@ -948,6 +954,11 @@ multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol);
}
def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Store_Pseudo>(ps#_SGPR_IMM)> {
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase,
SReg_32:$soffset, smem_offset_mod:$offset, CPol:$cpol);
}
}
defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;

View File

@ -581,6 +581,9 @@ s_store_dword s1, s[4:5], null
s_store_dword s1, s[4:5], 0x0
// GFX10: encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xfa]
s_store_dword s1, s[4:5], s0 offset:0x12345
// GFX10: encoding: [0x42,0x00,0x40,0xf4,0x45,0x23,0x01,0x00]
s_store_dword s1, s[4:5], s0 glc
// GFX10: encoding: [0x42,0x00,0x41,0xf4,0x00,0x00,0x00,0x00]
@ -593,6 +596,9 @@ s_store_dword s1, s[4:5], s0 glc dlc
s_store_dword s1, s[4:5], 0x1234 glc dlc
// GFX10: encoding: [0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa]
s_store_dword s1, s[4:5], s0 offset:0x12345 glc dlc
// GFX10: encoding: [0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00]
s_store_dwordx2 s[2:3], s[4:5], s0
// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0x00]
@ -632,6 +638,9 @@ s_store_dwordx2 s[2:3], s[4:5], null
s_store_dwordx2 s[2:3], s[4:5], 0x0
// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xfa]
s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345
// GFX10: encoding: [0x82,0x00,0x44,0xf4,0x45,0x23,0x01,0x00]
s_store_dwordx2 s[2:3], s[4:5], s0 glc
// GFX10: encoding: [0x82,0x00,0x45,0xf4,0x00,0x00,0x00,0x00]
@ -644,6 +653,9 @@ s_store_dwordx2 s[2:3], s[4:5], s0 glc dlc
s_store_dwordx2 s[2:3], s[4:5], 0x1234 glc dlc
// GFX10: encoding: [0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa]
s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345 glc dlc
// GFX10: encoding: [0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00]
s_store_dwordx4 s[4:7], s[4:5], s0
// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0x00]
@ -680,6 +692,9 @@ s_store_dwordx4 s[4:7], s[4:5], null
s_store_dwordx4 s[4:7], s[4:5], 0x0
// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xfa]
s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345
// GFX10: encoding: [0x02,0x01,0x48,0xf4,0x45,0x23,0x01,0x00]
s_store_dwordx4 s[4:7], s[4:5], s0 glc
// GFX10: encoding: [0x02,0x01,0x49,0xf4,0x00,0x00,0x00,0x00]
@ -692,6 +707,9 @@ s_store_dwordx4 s[4:7], s[4:5], s0 glc dlc
s_store_dwordx4 s[4:7], s[4:5], 0x1234 glc dlc
// GFX10: encoding: [0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa]
s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345 glc dlc
// GFX10: encoding: [0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00]
s_buffer_store_dword s1, s[8:11], s0
// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0x00]
@ -728,6 +746,9 @@ s_buffer_store_dword s1, s[8:11], null
s_buffer_store_dword s1, s[8:11], 0x0
// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xfa]
s_buffer_store_dword s1, s[8:11], s0 offset:0x12345
// GFX10: encoding: [0x44,0x00,0x60,0xf4,0x45,0x23,0x01,0x00]
s_buffer_store_dword s1, s[8:11], s0 glc
// GFX10: encoding: [0x44,0x00,0x61,0xf4,0x00,0x00,0x00,0x00]
@ -740,6 +761,9 @@ s_buffer_store_dword s1, s[8:11], s0 glc dlc
s_buffer_store_dword s1, s[8:11], 0x1234 glc dlc
// GFX10: encoding: [0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa]
s_buffer_store_dword s1, s[8:11], s0 offset:0x12345 glc dlc
// GFX10: encoding: [0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00]
s_buffer_store_dwordx2 s[2:3], s[8:11], s0
// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0x00]
@ -776,6 +800,9 @@ s_buffer_store_dwordx2 s[2:3], s[8:11], null
s_buffer_store_dwordx2 s[2:3], s[8:11], 0x0
// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xfa]
s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345
// GFX10: encoding: [0x84,0x00,0x64,0xf4,0x45,0x23,0x01,0x00]
s_buffer_store_dwordx2 s[2:3], s[8:11], s0 glc
// GFX10: encoding: [0x84,0x00,0x65,0xf4,0x00,0x00,0x00,0x00]
@ -788,6 +815,9 @@ s_buffer_store_dwordx2 s[2:3], s[8:11], s0 glc dlc
s_buffer_store_dwordx2 s[2:3], s[8:11], 0x1234 glc dlc
// GFX10: encoding: [0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa]
s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345 glc dlc
// GFX10: encoding: [0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00]
s_buffer_store_dwordx4 s[4:7], s[8:11], s0
// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0x00]
@ -821,6 +851,9 @@ s_buffer_store_dwordx4 s[4:7], s[8:11], null
s_buffer_store_dwordx4 s[4:7], s[8:11], 0x0
// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xfa]
s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345
// GFX10: encoding: [0x04,0x01,0x68,0xf4,0x45,0x23,0x01,0x00]
s_buffer_store_dwordx4 s[4:7], s[8:11], s0 glc
// GFX10: encoding: [0x04,0x01,0x69,0xf4,0x00,0x00,0x00,0x00]
@ -833,6 +866,9 @@ s_buffer_store_dwordx4 s[4:7], s[8:11], s0 glc dlc
s_buffer_store_dwordx4 s[4:7], s[8:11], 0x1234 glc dlc
// GFX10: encoding: [0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa]
s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345 glc dlc
// GFX10: encoding: [0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00]
s_memrealtime s[10:11]
// GFX10: encoding: [0x80,0x02,0x94,0xf4,0x00,0x00,0x00,0x00]
@ -893,6 +929,9 @@ s_scratch_store_dword s101, s[4:5], s0
s_scratch_store_dword s1, s[4:5], 0x123 glc
// GFX10: encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa]
s_scratch_store_dword s1, s[4:5], s0 offset:0x12345 glc
// GFX10: encoding: [0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00]
s_scratch_store_dwordx2 s[2:3], s[4:5], s101 glc
// GFX10: encoding: [0x82,0x00,0x59,0xf4,0x00,0x00,0x00,0xca]

View File

@ -11591,6 +11591,9 @@
# GFX10: s_buffer_store_dword s1, s[8:11], 0x1234 glc dlc ; encoding: [0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa]
0x44,0x40,0x61,0xf4,0x34,0x12,0x00,0xfa
# GFX10: s_buffer_store_dword s1, s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00]
0x44,0x40,0x61,0xf4,0x45,0x23,0x01,0x00
# GFX10: s_buffer_store_dword s1, s[8:11], m0 ; encoding: [0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xf8]
0x44,0x00,0x60,0xf4,0x00,0x00,0x00,0xf8
@ -11639,6 +11642,9 @@
# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], 0x1234 glc dlc ; encoding: [0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa]
0x84,0x40,0x65,0xf4,0x34,0x12,0x00,0xfa
# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00]
0x84,0x40,0x65,0xf4,0x45,0x23,0x01,0x00
# GFX10: s_buffer_store_dwordx2 s[2:3], s[8:11], m0 ; encoding: [0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xf8]
0x84,0x00,0x64,0xf4,0x00,0x00,0x00,0xf8
@ -11681,6 +11687,9 @@
# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], 0x1234 glc dlc ; encoding: [0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa]
0x04,0x41,0x69,0xf4,0x34,0x12,0x00,0xfa
# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], s0 offset:0x12345 glc dlc ; encoding: [0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00]
0x04,0x41,0x69,0xf4,0x45,0x23,0x01,0x00
# GFX10: s_buffer_store_dwordx4 s[4:7], s[8:11], m0 ; encoding: [0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xf8]
0x04,0x01,0x68,0xf4,0x00,0x00,0x00,0xf8
@ -18086,6 +18095,9 @@
# GFX10: s_scratch_store_dword s1, s[4:5], 0x123 glc ; encoding: [0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa]
0x42,0x00,0x55,0xf4,0x23,0x01,0x00,0xfa
# GFX10: s_scratch_store_dword s1, s[4:5], s0 offset:0x12345 glc ; encoding: [0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00]
0x42,0x00,0x55,0xf4,0x45,0x23,0x01,0x00
# GFX10: s_scratch_store_dword s101, s[4:5], s0 ; encoding: [0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00]
0x42,0x19,0x54,0xf4,0x00,0x00,0x00,0x00
@ -18323,6 +18335,9 @@
# GFX10: s_store_dword s1, s[4:5], 0x1234 glc dlc ; encoding: [0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa]
0x42,0x40,0x41,0xf4,0x34,0x12,0x00,0xfa
# GFX10: s_store_dword s1, s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00]
0x42,0x40,0x41,0xf4,0x45,0x23,0x01,0x00
# GFX10: s_store_dword s1, s[4:5], m0 ; encoding: [0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xf8]
0x42,0x00,0x40,0xf4,0x00,0x00,0x00,0xf8
@ -18374,6 +18389,9 @@
# GFX10: s_store_dwordx2 s[2:3], s[4:5], 0x1234 glc dlc ; encoding: [0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa]
0x82,0x40,0x45,0xf4,0x34,0x12,0x00,0xfa
# GFX10: s_store_dwordx2 s[2:3], s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00]
0x82,0x40,0x45,0xf4,0x45,0x23,0x01,0x00
# GFX10: s_store_dwordx2 s[2:3], s[4:5], m0 ; encoding: [0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xf8]
0x82,0x00,0x44,0xf4,0x00,0x00,0x00,0xf8
@ -18419,6 +18437,9 @@
# GFX10: s_store_dwordx4 s[4:7], s[4:5], 0x1234 glc dlc ; encoding: [0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa]
0x02,0x41,0x49,0xf4,0x34,0x12,0x00,0xfa
# GFX10: s_store_dwordx4 s[4:7], s[4:5], s0 offset:0x12345 glc dlc ; encoding: [0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00]
0x02,0x41,0x49,0xf4,0x45,0x23,0x01,0x00
# GFX10: s_store_dwordx4 s[4:7], s[4:5], m0 ; encoding: [0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xf8]
0x02,0x01,0x48,0xf4,0x00,0x00,0x00,0xf8