forked from OSchip/llvm-project
[AMDGPU] Allow no saddr for global addtid insts
I think the global_load/store_dword_addtid instructions support switching off the scalar address. Add assembler and disassembler support for this. Differential Revision: https://reviews.llvm.org/D93288
This commit is contained in:
parent
741978d727
commit
409a2f0f9e
|
@ -192,24 +192,34 @@ multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit Ha
|
|||
}
|
||||
|
||||
class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
|
||||
bit HasTiedOutput = 0, bit HasSignedOffset = 0> : FLAT_Pseudo<
|
||||
bit HasTiedOutput = 0, bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
|
||||
opName,
|
||||
(outs regClass:$vdst),
|
||||
!con((ins SReg_64:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
|
||||
!con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
|
||||
(ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
|
||||
!if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
|
||||
" $vdst, $saddr$offset$glc$slc$dlc"> {
|
||||
" $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
|
||||
let is_flat_global = 1;
|
||||
let has_data = 0;
|
||||
let mayLoad = 1;
|
||||
let has_vaddr = 0;
|
||||
let has_saddr = 1;
|
||||
let enabled_saddr = 1;
|
||||
let enabled_saddr = EnableSaddr;
|
||||
let maybeAtomic = 1;
|
||||
let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
|
||||
|
||||
let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
|
||||
let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
|
||||
}
|
||||
|
||||
multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass,
|
||||
bit HasTiedOutput = 0, bit HasSignedOffset = 0> {
|
||||
def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset>,
|
||||
GlobalSaddrTable<0, opName>;
|
||||
def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset, 1>,
|
||||
GlobalSaddrTable<1, opName>;
|
||||
}
|
||||
|
||||
multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
|
||||
let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
|
||||
def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
|
||||
|
@ -220,21 +230,29 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
|
|||
}
|
||||
|
||||
class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
|
||||
bit HasSignedOffset = 0> : FLAT_Pseudo<
|
||||
bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
|
||||
opName,
|
||||
(outs),
|
||||
!con(
|
||||
(ins vdataClass:$vdata, SReg_64:$saddr),
|
||||
(ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
|
||||
" $vdata, $saddr$offset$glc$slc$dlc"> {
|
||||
!con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
|
||||
(ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
|
||||
" $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
|
||||
let is_flat_global = 1;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 1;
|
||||
let has_vdst = 0;
|
||||
let has_vaddr = 0;
|
||||
let has_saddr = 1;
|
||||
let enabled_saddr = 1;
|
||||
let enabled_saddr = EnableSaddr;
|
||||
let maybeAtomic = 1;
|
||||
let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
|
||||
}
|
||||
|
||||
multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass,
|
||||
bit HasSignedOffset = 0> {
|
||||
def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset>,
|
||||
GlobalSaddrTable<0, opName>;
|
||||
def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset, 1>,
|
||||
GlobalSaddrTable<1, opName>;
|
||||
}
|
||||
|
||||
class FlatScratchInst <string sv_op, string mode> {
|
||||
|
@ -603,7 +621,7 @@ defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_
|
|||
defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
|
||||
defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
|
||||
let OtherPredicates = [HasGFX10_BEncoding] in
|
||||
def GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
|
||||
defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
|
||||
|
||||
defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
|
||||
defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
|
||||
|
@ -612,7 +630,7 @@ defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VR
|
|||
defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
|
||||
defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
|
||||
let OtherPredicates = [HasGFX10_BEncoding] in
|
||||
def GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
|
||||
defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
|
||||
|
||||
defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
|
||||
defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
|
||||
|
@ -1651,8 +1669,8 @@ defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>;
|
|||
defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>;
|
||||
defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>;
|
||||
defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>;
|
||||
defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x016>;
|
||||
defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x017>;
|
||||
defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>;
|
||||
defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>;
|
||||
|
||||
// ENC_FLAT_SCRATCH.
|
||||
defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>;
|
||||
|
|
|
@ -12,6 +12,15 @@ global_load_dword_addtid v1, s[2:3] offset:16 glc slc dlc
|
|||
global_store_dword_addtid v1, s[2:3] offset:16 glc slc dlc
|
||||
// GFX10: encoding: [0x10,0x90,0x5f,0xdc,0x00,0x01,0x02,0x00]
|
||||
|
||||
global_load_dword_addtid v1, off offset:16
|
||||
// GFX10: encoding: [0x10,0x80,0x58,0xdc,0x00,0x00,0x7d,0x01]
|
||||
|
||||
global_load_dword_addtid v1, off offset:16 glc slc dlc
|
||||
// GFX10: encoding: [0x10,0x90,0x5b,0xdc,0x00,0x00,0x7d,0x01]
|
||||
|
||||
global_store_dword_addtid v1, off offset:16 glc slc dlc
|
||||
// GFX10: encoding: [0x10,0x90,0x5f,0xdc,0x00,0x01,0x7d,0x00]
|
||||
|
||||
global_store_dword v254, v1, s[2:3] offset:16
|
||||
// GFX10: encoding: [0x10,0x80,0x70,0xdc,0xfe,0x01,0x02,0x00]
|
||||
|
||||
|
|
|
@ -12,6 +12,15 @@
|
|||
# GFX10: global_store_dword_addtid v1, s[2:3] offset:16 glc slc dlc
|
||||
0x10,0x90,0x5f,0xdc,0x00,0x01,0x02,0x00
|
||||
|
||||
# GFX10: global_load_dword_addtid v1, off offset:16
|
||||
0x10,0x80,0x58,0xdc,0x00,0x00,0x7d,0x01
|
||||
|
||||
# GFX10: global_load_dword_addtid v1, off offset:16 glc slc dlc
|
||||
0x10,0x90,0x5b,0xdc,0x00,0x00,0x7d,0x01
|
||||
|
||||
# GFX10: global_store_dword_addtid v1, off offset:16 glc slc dlc
|
||||
0x10,0x90,0x5f,0xdc,0x00,0x01,0x7d,0x00
|
||||
|
||||
# GFX10: global_store_dword v254, v1, s[2:3] offset:16
|
||||
0x10,0x80,0x70,0xdc,0xfe,0x01,0x02,0x00
|
||||
|
||||
|
|
Loading…
Reference in New Issue