[AMDGPU] Define special SGPR subregs

These are used in SReg_32 and when we start to use SGPR_LO16
there will be compaints that not all registers in RC support
all subreg indexes. For now it is NFC.

Unused regunits are reserved so that verifier does not complain
about missing phys reg live-ins.

Differential Revision: https://reviews.llvm.org/D78591
This commit is contained in:
Stanislav Mekhanoshin 2020-04-28 13:54:14 -07:00
parent 9f31446c99
commit 46a75436f8
9 changed files with 132 additions and 102 deletions

View File

@ -765,7 +765,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
}
if (AMDGPU::SReg_32RegClass.contains(Reg) ||
AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
AMDGPU::SReg_LO16RegClass.contains(Reg) ||
AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
"trap handler registers should not be used");

View File

@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
def SGPRRegBank : RegisterBank<"SGPR",
[SGPR_LO16, SReg_32, SReg_64, SReg_128, SReg_160, SReg_192, SReg_256, SReg_512, SReg_1024]
[SReg_LO16, SReg_32, SReg_64, SReg_128, SReg_160, SReg_192, SReg_256, SReg_512, SReg_1024]
>;
def VGPRRegBank : RegisterBank<"VGPR",

View File

@ -678,21 +678,20 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass ||
RC == &AMDGPU::SGPR_LO16RegClass) {
if (RI.getRegSizeInBits(*RC) == 16) {
assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
AMDGPU::VGPR_HI16RegClass.contains(SrcReg) ||
AMDGPU::SGPR_LO16RegClass.contains(SrcReg));
AMDGPU::SReg_LO16RegClass.contains(SrcReg));
bool IsSGPRDst = AMDGPU::SGPR_LO16RegClass.contains(DestReg);
bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
bool DstLow = (RC == &AMDGPU::VGPR_LO16RegClass ||
RC == &AMDGPU::SGPR_LO16RegClass);
bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) ||
AMDGPU::SReg_LO16RegClass.contains(DestReg);
bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass
AMDGPU::SReg_LO16RegClass.contains(SrcReg);
const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SReg_32RegClass
: &AMDGPU::VGPR_32RegClass;
const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass
const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SReg_32RegClass
: &AMDGPU::VGPR_32RegClass;
MCRegister NewDestReg =
RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16,

View File

@ -263,6 +263,14 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, Reg);
}
for (auto Reg : AMDGPU::SReg_32RegClass) {
Reserved.set(getSubReg(Reg, AMDGPU::hi16));
Register Low = getSubReg(Reg, AMDGPU::lo16);
// This is to prevent BB vcc liveness errors.
if (!AMDGPU::SGPR_LO16RegClass.contains(Low))
Reserved.set(Low);
}
// Reserve all the rest AGPRs if there are no instructions to use it.
if (!ST.hasMAIInsts()) {
for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
@ -1365,7 +1373,7 @@ SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
static const TargetRegisterClass *const BaseClasses[] = {
&AMDGPU::VGPR_LO16RegClass,
&AMDGPU::VGPR_HI16RegClass,
&AMDGPU::SGPR_LO16RegClass,
&AMDGPU::SReg_LO16RegClass,
&AMDGPU::VGPR_32RegClass,
&AMDGPU::SReg_32RegClass,
&AMDGPU::AGPR_32RegClass,

View File

@ -123,25 +123,41 @@ class SIRegisterTuples<list<SubRegIndex> Indices, RegisterClass RC,
class SIReg <string n, bits<16> regIdx = 0> :
Register<n> {
let Namespace = "AMDGPU";
// This is the not yet the complete register encoding. An additional
// bit is set for VGPRs.
let HWEncoding = regIdx;
}
class SIRegWithSubRegs <string n, list<Register> subregs, bits<16> regIdx = 0> :
class SIRegWithSubRegs <string n, list<Register> subregs, bits<16> regIdx> :
RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
}
// This is the not yet the complete register encoding. An additional
// bit is set for VGPRs.
let HWEncoding = regIdx;
let CoveredBySubRegs = 1;
multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1,
bit HWEncodingHigh = 0> {
// There is no special encoding for 16 bit subregs, these are not real
// registers but rather operands for instructions preserving other 16 bits
// of the result or reading just 16 bits of a 32 bit VGPR.
// It is encoded as a corresponding 32 bit register.
// Non-VGPR register classes use it as we need to have matching subregisters
// to move instructions and data between ALUs.
def _LO16 : SIReg<n#".l", regIdx> {
let HWEncoding{8} = HWEncodingHigh;
}
def _HI16 : SIReg<!if(ArtificialHigh, "", n#".h"), regIdx> {
let isArtificial = ArtificialHigh;
let HWEncoding{8} = HWEncodingHigh;
}
def "" : RegisterWithSubRegs<n, [!cast<Register>(NAME#"_LO16"),
!cast<Register>(NAME#"_HI16")]> {
let Namespace = "AMDGPU";
let SubRegIndices = [lo16, hi16];
let CoveredBySubRegs = !if(ArtificialHigh,0,1);
let HWEncoding = regIdx;
let HWEncoding{8} = HWEncodingHigh;
}
}
// Special Registers
def VCC_LO : SIReg<"vcc_lo", 106>;
def VCC_HI : SIReg<"vcc_hi", 107>;
defm VCC_LO : SIRegLoHi16<"vcc_lo", 106>;
defm VCC_HI : SIRegLoHi16<"vcc_hi", 107>;
// Pseudo-registers: Used as placeholders during isel and immediately
// replaced, never seeing the verifier.
@ -164,8 +180,8 @@ def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
let HWEncoding = 106;
}
def EXEC_LO : SIReg<"exec_lo", 126>, DwarfRegNum<[1, 1]>;
def EXEC_HI : SIReg<"exec_hi", 127>;
defm EXEC_LO : SIRegLoHi16<"exec_lo", 126>, DwarfRegNum<[1, 1]>;
defm EXEC_HI : SIRegLoHi16<"exec_hi", 127>;
def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegNum<[17, 1]> {
let Namespace = "AMDGPU";
@ -175,22 +191,22 @@ def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegNum<[17, 1]>
// 32-bit real registers, for MC only.
// May be used with both 32-bit and 64-bit operands.
def SRC_VCCZ : SIReg<"src_vccz", 251>;
def SRC_EXECZ : SIReg<"src_execz", 252>;
def SRC_SCC : SIReg<"src_scc", 253>;
defm SRC_VCCZ : SIRegLoHi16<"src_vccz", 251>;
defm SRC_EXECZ : SIRegLoHi16<"src_execz", 252>;
defm SRC_SCC : SIRegLoHi16<"src_scc", 253>;
// 1-bit pseudo register, for codegen only.
// Should never be emitted.
def SCC : SIReg<"scc">;
def M0 : SIReg <"m0", 124>;
def SGPR_NULL : SIReg<"null", 125>;
defm M0 : SIRegLoHi16 <"m0", 124>;
defm SGPR_NULL : SIRegLoHi16 <"null", 125>;
def SRC_SHARED_BASE : SIReg<"src_shared_base", 235>;
def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>;
def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
def SRC_POPS_EXITING_WAVE_ID : SIReg<"src_pops_exiting_wave_id", 239>;
defm SRC_SHARED_BASE : SIRegLoHi16<"src_shared_base", 235>;
defm SRC_SHARED_LIMIT : SIRegLoHi16<"src_shared_limit", 236>;
defm SRC_PRIVATE_BASE : SIRegLoHi16<"src_private_base", 237>;
defm SRC_PRIVATE_LIMIT : SIRegLoHi16<"src_private_limit", 238>;
defm SRC_POPS_EXITING_WAVE_ID : SIRegLoHi16<"src_pops_exiting_wave_id", 239>;
def LDS_DIRECT : SIReg <"src_lds_direct", 254> {
// There is no physical register corresponding to this. This is an
@ -199,8 +215,8 @@ def LDS_DIRECT : SIReg <"src_lds_direct", 254> {
let isArtificial = 1;
}
def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
defm XNACK_MASK_LO : SIRegLoHi16<"xnack_mask_lo", 104>;
defm XNACK_MASK_HI : SIRegLoHi16<"xnack_mask_hi", 105>;
def XNACK_MASK :
RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]> {
@ -210,8 +226,8 @@ def XNACK_MASK :
}
// Trap handler registers
def TBA_LO : SIReg<"tba_lo", 108>;
def TBA_HI : SIReg<"tba_hi", 109>;
defm TBA_LO : SIRegLoHi16<"tba_lo", 108>;
defm TBA_HI : SIRegLoHi16<"tba_hi", 109>;
def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]> {
let Namespace = "AMDGPU";
@ -219,8 +235,8 @@ def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]> {
let HWEncoding = 108;
}
def TMA_LO : SIReg<"tma_lo", 110>;
def TMA_HI : SIReg<"tma_hi", 111>;
defm TMA_LO : SIRegLoHi16<"tma_lo", 110>;
defm TMA_HI : SIRegLoHi16<"tma_hi", 111>;
def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]> {
let Namespace = "AMDGPU";
@ -229,15 +245,15 @@ def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]> {
}
foreach Index = 0-15 in {
def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>;
def TTMP#Index#_gfx9_gfx10 : SIReg<"ttmp"#Index, !add(108, Index)>;
def TTMP#Index : SIReg<"ttmp"#Index, 0>;
defm TTMP#Index#_vi : SIRegLoHi16<"ttmp"#Index, !add(112, Index)>;
defm TTMP#Index#_gfx9_gfx10 : SIRegLoHi16<"ttmp"#Index, !add(108, Index)>;
defm TTMP#Index : SIRegLoHi16<"ttmp"#Index, 0>;
}
multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
def _ci : SIReg<n, ci_e>;
def _vi : SIReg<n, vi_e>;
def "" : SIReg<n, 0>;
defm _ci : SIRegLoHi16<n, ci_e>;
defm _vi : SIRegLoHi16<n, vi_e>;
defm "" : SIRegLoHi16<n, 0>;
}
class FlatReg <Register lo, Register hi, bits<16> encoding> :
@ -256,50 +272,17 @@ def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
// SGPR registers
foreach Index = 0-105 in {
def SGPR#Index#_LO16 : SIReg <"s"#Index#".l", Index>,
DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>;
// This is a placeholder to fill high lane in mask.
def SGPR#Index#_HI16 : SIReg <"", Index> {
let isArtificial = 1;
}
def SGPR#Index :
SIRegWithSubRegs <"s"#Index, [!cast<Register>("SGPR"#Index#"_LO16"),
!cast<Register>("SGPR"#Index#"_HI16")],
Index>,
DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]> {
let SubRegIndices = [lo16, hi16];
}
defm SGPR#Index :
SIRegLoHi16 <"s"#Index, Index>,
DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>;
}
// VGPR registers
foreach Index = 0-255 in {
// There is no special encoding for low 16 bit subreg, this not a real
// register but rather an operand for instructions preserving high 16 bits
// of the result or reading just low 16 bits of a 32 bit VGPR.
// It is encoded as a corresponding 32 bit register.
def VGPR#Index#_LO16 : SIReg <"v"#Index#".l", Index>,
DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> {
let HWEncoding{8} = 1;
}
// There is no special encoding for low 16 bit subreg, this not a real
// register but rather an operand for instructions preserving low 16 bits
// of the result or reading just high 16 bits of a 32 bit VGPR.
// It is encoded as a corresponding 32 bit register.
def VGPR#Index#_HI16 : SIReg <"v"#Index#".h", Index>,
DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> {
let HWEncoding{8} = 1;
}
def VGPR#Index : SIRegWithSubRegs <"v"#Index,
[!cast<Register>("VGPR"#Index#"_LO16"), !cast<Register>("VGPR"#Index#"_HI16")],
Index>,
DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]> {
let HWEncoding{8} = 1;
let SubRegIndices = [lo16, hi16];
}
defm VGPR#Index :
SIRegLoHi16 <"v"#Index, Index, 0, 1>,
DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]>;
}
// AccVGPR registers
@ -325,11 +308,17 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
let isAllocatable = 0;
}
def M0_CLASS_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> {
let CopyCost = 1;
let Size = 16;
let isAllocatable = 0;
}
// TODO: Do we need to set DwarfRegAlias on register tuples?
def SGPR_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
(add (sequence "SGPR%u_LO16", 0, 105))> {
let AllocationPriority = 1;
let AllocationPriority = 9;
let Size = 16;
let GeneratePressureSet = 0;
}
@ -380,6 +369,12 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
let isAllocatable = 0;
}
def TTMP_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
(add (sequence "TTMP%u_LO16", 0, 15))> {
let Size = 16;
let isAllocatable = 0;
}
// Trap handler TMP 64-bit registers
def TTMP_64Regs : SIRegisterTuples<getSubRegs<2>.ret, TTMP_32, 15, 2, 2, "ttmp">;
@ -588,15 +583,43 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1
let AllocationPriority = 10;
}
def SReg_LO16_XM0_XEXEC : RegisterClass<"AMDGPU", [i16, f16], 16,
(add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16,
XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, TTMP_LO16, TMA_LO_LO16,
TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16,
SRC_SHARED_LIMIT_LO16, SRC_PRIVATE_BASE_LO16, SRC_PRIVATE_LIMIT_LO16,
SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16)> {
let Size = 16;
let AllocationPriority = 10;
}
def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> {
let AllocationPriority = 10;
}
def SReg_LO16_XEXEC_HI : RegisterClass<"AMDGPU", [i16, f16], 16,
(add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, M0_CLASS_LO16)> {
let Size = 16;
let AllocationPriority = 10;
}
def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 10;
}
def SReg_LO16_XM0 : RegisterClass<"AMDGPU", [i16, f16], 16,
(add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, EXEC_HI_LO16)> {
let Size = 16;
let AllocationPriority = 10;
}
def SReg_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
(add SGPR_LO16, SReg_LO16_XM0, M0_CLASS_LO16, EXEC_LO_LO16, EXEC_HI_LO16, SReg_LO16_XEXEC_HI)> {
let Size = 16;
let AllocationPriority = 10;
}
} // End GeneratePressureSet = 0
// Register class for all scalar registers (SGPRs + Special Registers)

View File

@ -24,13 +24,13 @@ define void @nothing() #0 {
ret void
}
; CHECK-DAG: special_regs Clobbered Registers: $scc $m0 {{$}}
; CHECK-DAG: special_regs Clobbered Registers: $scc $m0 $m0_hi16 $m0_lo16 {{$}}
define void @special_regs() #0 {
call void asm sideeffect "", "~{m0},~{scc}"() #0
ret void
}
; CHECK-DAG: vcc Clobbered Registers: $vcc $vcc_hi $vcc_lo {{$}}
; CHECK-DAG: vcc Clobbered Registers: $vcc $vcc_hi $vcc_lo $vcc_hi_hi16 $vcc_hi_lo16 $vcc_lo_hi16 $vcc_lo_lo16 {{$}}
define void @vcc() #0 {
call void asm sideeffect "", "~{vcc}"() #0
ret void

View File

@ -33,7 +33,7 @@ body: |
; CHECK: dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; CHECK: undef %11.sub1:vreg_512 = COPY [[COPY]].sub1
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead [[COPY1]], 851978 /* regdef:SGPR_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def dead [[COPY1]], 851978 /* regdef:SReg_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1
; CHECK: %11.sub0:vreg_512 = COPY [[COPY]].sub0
; CHECK: %11.sub3:vreg_512 = COPY [[COPY]].sub3
; CHECK: dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec

View File

@ -36,18 +36,18 @@ body: |
; CHECK: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK: bb.1:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead %11
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def dead %11
; CHECK: GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; CHECK: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3)
; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %15, 851978 /* regdef:SGPR_LO16 */, def %16
; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def %15, 851978 /* regdef:SReg_LO16 */, def %16
; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec
; CHECK: [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
; CHECK: [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec
; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %21, 851978 /* regdef:SGPR_LO16 */, def %22
; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def %21, 851978 /* regdef:SReg_LO16 */, def %22
; CHECK: [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:SGPR_LO16 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:SGPR_LO16 */, %15, 851977 /* reguse:SGPR_LO16 */, %16, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_2]]
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:SReg_LO16 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:SReg_LO16 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:SReg_LO16 */, %15, 851977 /* reguse:SReg_LO16 */, %16, 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_2]]
; CHECK: %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]]
; CHECK: DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3)
; CHECK: DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3)

View File

@ -25,9 +25,9 @@ body: |
; CHECK: bb.1:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3)
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]]
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub0, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub1
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_]]
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def undef %0.sub0, 851978 /* regdef:SReg_LO16 */, def undef %0.sub1
; CHECK: S_NOP 0, implicit %0.sub1
; CHECK: $sgpr10 = S_MOV_B32 -1
; CHECK: S_BRANCH %bb.1
@ -63,9 +63,9 @@ body: |
; CHECK: bb.1:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3)
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]]
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub1, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub0
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_]]
; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def undef %0.sub1, 851978 /* regdef:SReg_LO16 */, def undef %0.sub0
; CHECK: S_NOP 0, implicit %0.sub1
; CHECK: $sgpr10 = S_MOV_B32 -1
; CHECK: S_BRANCH %bb.1