[AMDGPU] Avoid inserting noops during scheduling

Passes that are run after the post-RA scheduler may insert instructions like
waitcnt which eliminate the need for certain noops. After this patch the
scheduler is still aware of possible latency from hazards but noops will
not be inserted until the dedicated hazard recognizer pass is run.

Depends on D89753.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D89754
This commit is contained in:
Austin Kerbow 2020-10-19 16:54:24 -07:00
parent 37d907899f
commit ebdcef20ce
12 changed files with 70 additions and 266 deletions

View File

@ -138,67 +138,71 @@ static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
ScheduleHazardRecognizer::HazardType
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
MachineInstr *MI = SU->getInstr();
// If we are not in "HazardRecognizerMode" and therefore not being run from
// the scheduler, track possible stalls from hazards but don't insert noops.
auto HazardType = IsHazardRecognizerMode ? NoopHazard : Hazard;
if (MI->isBundle())
return NoHazard;
if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
return NoopHazard;
return HazardType;
// FIXME: Should flat be considered vmem?
if ((SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI))
&& checkVMEMHazards(MI) > 0)
return NoopHazard;
return HazardType;
if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
return NoopHazard;
return HazardType;
if (checkFPAtomicToDenormModeHazard(MI) > 0)
return NoopHazard;
return HazardType;
if (ST.hasNoDataDepHazard())
return NoHazard;
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
return NoopHazard;
return HazardType;
if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
return NoopHazard;
return HazardType;
if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
return NoopHazard;
return HazardType;
if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
return NoopHazard;
return HazardType;
if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
return NoopHazard;
return HazardType;
if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
return NoopHazard;
return HazardType;
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
return NoopHazard;
return HazardType;
if (ST.hasReadM0MovRelInterpHazard() &&
(TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
checkReadM0Hazards(MI) > 0)
return NoopHazard;
return HazardType;
if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
checkReadM0Hazards(MI) > 0)
return NoopHazard;
return HazardType;
if (SIInstrInfo::isMAI(*MI) && checkMAIHazards(MI) > 0)
return NoopHazard;
return HazardType;
if ((SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI) ||
SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0)
return NoopHazard;
return HazardType;
if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
return NoopHazard;
return HazardType;
return NoHazard;
}
@ -312,15 +316,19 @@ void GCNHazardRecognizer::EmitNoop() {
void GCNHazardRecognizer::AdvanceCycle() {
// When the scheduler detects a stall, it will call AdvanceCycle() without
// emitting any instructions.
if (!CurrCycleInstr)
if (!CurrCycleInstr) {
EmittedInstrs.push_front(nullptr);
return;
}
// Do not track non-instructions which do not affect the wait states.
// If included, these instructions can lead to buffer overflow such that
// detectable hazards are missed.
if (CurrCycleInstr->isImplicitDef() || CurrCycleInstr->isDebugInstr() ||
CurrCycleInstr->isKill())
CurrCycleInstr->isKill()) {
CurrCycleInstr = nullptr;
return;
}
if (CurrCycleInstr->isBundle()) {
processBundle();

View File

@ -602,35 +602,6 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
}
}
// Combine adjacent s_nops to use the immediate operand encoding how long
// to wait.
//
// s_nop N
// s_nop M
// =>
// s_nop (N + M)
if (MI.getOpcode() == AMDGPU::S_NOP &&
MI.getNumOperands() == 1 && // Don't merge with implicit operands
Next != MBB.end() &&
(*Next).getOpcode() == AMDGPU::S_NOP &&
(*Next).getNumOperands() == 1) {
MachineInstr &NextMI = *Next;
// The instruction encodes the amount to wait with an offset of 1,
// i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back
// after adding.
uint8_t Nop0 = MI.getOperand(0).getImm() + 1;
uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1;
// Make sure we don't overflow the bounds.
if (Nop0 + Nop1 <= 8) {
NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1);
MI.eraseFromParent();
}
continue;
}
// FIXME: We also need to consider movs of constant operands since
// immediate operands are not folded if they have more than one use, and
// the operand folding pass is unaware if the immediate will be free since

View File

@ -8,7 +8,6 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(<4 x i128> addrspace(
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x0
; GFX9-NEXT: s_lshl_b32 m0, s4, 1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_movrels_b64 s[0:1], s[8:9]
; GFX9-NEXT: s_movrels_b64 s[2:3], s[10:11]

View File

@ -887,8 +887,8 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2
; GFX8-NEXT: s_and_b64 vcc, vcc, s[2:3]
; GFX8-NEXT: s_nop 1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
@ -992,8 +992,8 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
; GFX7-NEXT: s_mov_b32 s10, -1
; GFX7-NEXT: s_mov_b64 s[6:7], s[10:11]
; GFX7-NEXT: s_nop 1
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: s_nop 0
; GFX7-NEXT: v_div_fmas_f32 v0, v1, v2, v3
; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8
; GFX7-NEXT: s_endpgm
@ -1026,8 +1026,8 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
; GFX8-NEXT: s_addc_u32 s1, s5, 0
; GFX8-NEXT: s_and_b32 s2, 1, s2
; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
; GFX8-NEXT: s_nop 3
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_nop 2
; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1

View File

@ -48,10 +48,10 @@ define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i
; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_dpp v4, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
; GFX8-NEXT: s_endpgm
;

View File

@ -69,8 +69,8 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@ -98,8 +98,8 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@ -130,8 +130,8 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1064-NEXT: s_mov_b32 s2, -1
; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@ -162,8 +162,8 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1032-NEXT: s_mov_b32 s2, -1
; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@ -415,7 +415,6 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
@ -467,7 +466,6 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_add_u32_e32 v0, s2, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
@ -526,8 +524,8 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mov_b32_e32 v0, v1
; GFX1064-NEXT: v_add_nc_u32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@ -578,8 +576,8 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mov_b32_e32 v0, v1
; GFX1032-NEXT: v_add_nc_u32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@ -654,7 +652,6 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
@ -706,7 +703,6 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_add_u32_e32 v0, s2, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
@ -765,8 +761,8 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mov_b32_e32 v0, v1
; GFX1064-NEXT: v_add_nc_u32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@ -817,8 +813,8 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mov_b32_e32 v0, v1
; GFX1032-NEXT: v_add_nc_u32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@ -893,7 +889,6 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
@ -945,7 +940,6 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_add_u32_e32 v0, s2, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
@ -1004,8 +998,8 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mov_b32_e32 v0, v1
; GFX1064-NEXT: v_add_nc_u32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@ -1056,8 +1050,8 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mov_b32_e32 v0, v1
; GFX1032-NEXT: v_add_nc_u32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@ -1134,8 +1128,8 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v0, 5, v[1:2]
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 2
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_nop 1
; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@ -1167,8 +1161,8 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v0, 5, v[1:2]
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@ -1201,8 +1195,8 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: v_mad_u64_u32 v[0:1], s[2:3], v0, 5, s[2:3]
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_mov_b32 s2, -1
; GFX1064-NEXT: s_nop 2
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@ -1235,8 +1229,8 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: v_mad_u64_u32 v[0:1], s2, v0, 5, s[2:3]
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_mov_b32 s2, -1
; GFX1032-NEXT: s_nop 2
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@ -1619,7 +1613,6 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
@ -1649,7 +1642,6 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
@ -1682,7 +1674,6 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) {
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX1064-NEXT: s_mov_b32 s2, -1
; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
@ -1715,7 +1706,6 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) {
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: v_sub_nc_u32_e32 v0, s2, v0
; GFX1032-NEXT: s_mov_b32 s2, -1
; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
@ -1968,7 +1958,6 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
@ -2020,7 +2009,6 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
@ -2079,8 +2067,8 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mov_b32_e32 v0, v1
; GFX1064-NEXT: v_sub_nc_u32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@ -2131,8 +2119,8 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mov_b32_e32 v0, v1
; GFX1032-NEXT: v_sub_nc_u32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@ -2705,7 +2693,6 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_and_b32_e32 v0, s2, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
@ -2756,7 +2743,6 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_and_b32_e32 v0, s2, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
@ -2814,8 +2800,8 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mov_b32_e32 v0, v1
; GFX1064-NEXT: v_and_b32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@ -2865,8 +2851,8 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mov_b32_e32 v0, v1
; GFX1032-NEXT: v_and_b32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@ -2941,7 +2927,6 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_or_b32_e32 v0, s2, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
@ -2993,7 +2978,6 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_or_b32_e32 v0, s2, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
@ -3052,8 +3036,8 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mov_b32_e32 v0, v1
; GFX1064-NEXT: v_or_b32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@ -3104,8 +3088,8 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mov_b32_e32 v0, v1
; GFX1032-NEXT: v_or_b32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@ -3180,7 +3164,6 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_xor_b32_e32 v0, s2, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
@ -3232,7 +3215,6 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
@ -3291,8 +3273,8 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mov_b32_e32 v0, v1
; GFX1064-NEXT: v_xor_b32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@ -3343,8 +3325,8 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mov_b32_e32 v0, v1
; GFX1032-NEXT: v_xor_b32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@ -3418,7 +3400,6 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_max_i32_e32 v0, s2, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
@ -3469,7 +3450,6 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_max_i32_e32 v0, s2, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
@ -3527,8 +3507,8 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mov_b32_e32 v0, v1
; GFX1064-NEXT: v_max_i32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@ -3578,8 +3558,8 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mov_b32_e32 v0, v1
; GFX1032-NEXT: v_max_i32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@ -3838,7 +3818,6 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_min_i32_e32 v0, s2, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
@ -3889,7 +3868,6 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_min_i32_e32 v0, s2, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
@ -3947,8 +3925,8 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mov_b32_e32 v0, v1
; GFX1064-NEXT: v_min_i32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@ -3998,8 +3976,8 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mov_b32_e32 v0, v1
; GFX1032-NEXT: v_min_i32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@ -4259,7 +4237,6 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_max_u32_e32 v0, s2, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
@ -4311,7 +4288,6 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_max_u32_e32 v0, s2, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
@ -4370,8 +4346,8 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mov_b32_e32 v0, v1
; GFX1064-NEXT: v_max_u32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@ -4422,8 +4398,8 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mov_b32_e32 v0, v1
; GFX1032-NEXT: v_max_u32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@ -4679,7 +4655,6 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_min_u32_e32 v0, s2, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
@ -4730,7 +4705,6 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_min_u32_e32 v0, s2, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
@ -4788,8 +4762,8 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mov_b32_e32 v0, v1
; GFX1064-NEXT: v_min_u32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@ -4839,8 +4813,8 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mov_b32_e32 v0, v1
; GFX1032-NEXT: v_min_u32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:

View File

@ -223,7 +223,6 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: s_not_b64 exec, exec
; GFX8-NEXT: s_or_saveexec_b64 s[10:11], -1
; GFX8-NEXT: s_nop 0
; GFX8-NEXT: v_add_u32_dpp v2, vcc, v2, v2 row_shr:1 row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX8-NEXT: s_nop 1
; GFX8-NEXT: v_add_u32_dpp v2, vcc, v2, v2 row_shr:2 row_mask:0xf bank_mask:0xf bound_ctrl:0
@ -280,7 +279,6 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_not_b64 exec, exec
; GFX9-NEXT: s_or_saveexec_b64 s[10:11], -1
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: v_add_u32_dpp v2, v2, v2 row_shr:1 row_mask:0xf bank_mask:0xf bound_ctrl:0
; GFX9-NEXT: s_nop 1
; GFX9-NEXT: v_add_u32_dpp v2, v2, v2 row_shr:2 row_mask:0xf bank_mask:0xf bound_ctrl:0

View File

@ -185,8 +185,8 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset,
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s0, v2
; GFX9-NEXT: v_readfirstlane_b32 s1, v3
; GFX9-NEXT: s_nop 4
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_nop 3
; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[0:1] glc
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_wbinvl1
@ -199,9 +199,9 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset,
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_readfirstlane_b32 s0, v2
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
; GFX10-NEXT: s_nop 4
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_nop 2
; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] glc
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: buffer_gl0_inv
@ -225,8 +225,8 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s0, v2
; GFX9-NEXT: v_readfirstlane_b32 s1, v3
; GFX9-NEXT: s_nop 4
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_nop 3
; GFX9-NEXT: global_atomic_swap v0, v0, v1, s[0:1] offset:42 glc
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_wbinvl1
@ -239,9 +239,9 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_readfirstlane_b32 s0, v2
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
; GFX10-NEXT: s_nop 4
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_nop 2
; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] offset:42 glc
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: buffer_gl0_inv
@ -266,8 +266,8 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn(i32 %voffset
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s0, v2
; GFX9-NEXT: v_readfirstlane_b32 s1, v3
; GFX9-NEXT: s_nop 4
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_nop 3
; GFX9-NEXT: global_atomic_swap v0, v1, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_wbinvl1
@ -280,9 +280,9 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn(i32 %voffset
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_readfirstlane_b32 s0, v2
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
; GFX10-NEXT: s_nop 4
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_nop 2
; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1]
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
@ -306,8 +306,8 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset(i3
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s0, v2
; GFX9-NEXT: v_readfirstlane_b32 s1, v3
; GFX9-NEXT: s_nop 4
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_nop 3
; GFX9-NEXT: global_atomic_swap v0, v1, s[0:1] offset:42
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_wbinvl1
@ -320,9 +320,9 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset(i3
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_readfirstlane_b32 s0, v2
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
; GFX10-NEXT: s_nop 4
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_nop 2
; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1] offset:42
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0

View File

@ -30,9 +30,7 @@ define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in
; GFX8-OPT: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}
; GFX8-NOOPT: v_add_u32_e64 [[REG:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX8-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0
; GFX8-NOOPT: s_nop 1
; GFX8-OPT: s_nop 0
; GFX8-OPT-NEXT: s_nop 0
; GFX8: s_nop 1
; GFX8-NEXT: v_mov_b32_dpp {{v[0-9]+}}, [[REG]] quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf
@0 = internal unnamed_addr addrspace(3) global [448 x i32] undef, align 4
define weak_odr amdgpu_kernel void @dpp_test1(i32* %arg) local_unnamed_addr {

View File

@ -12,7 +12,6 @@ define amdgpu_kernel void @vector_clause(<4 x i32> addrspace(1)* noalias nocaptu
; GCN-NEXT: global_load_dwordx4 v[4:7], v16, s[2:3] offset:16
; GCN-NEXT: global_load_dwordx4 v[8:11], v16, s[2:3] offset:32
; GCN-NEXT: global_load_dwordx4 v[12:15], v16, s[2:3] offset:48
; GCN-NEXT: s_nop 0
; GCN-NEXT: s_waitcnt vmcnt(3)
; GCN-NEXT: global_store_dwordx4 v16, v[0:3], s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(3)
@ -52,7 +51,6 @@ define amdgpu_kernel void @scalar_clause(<4 x i32> addrspace(1)* noalias nocaptu
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx2 s[18:19], s[0:1], 0x2c
; GCN-NEXT: s_nop 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_load_dwordx4 s[0:3], s[16:17], 0x0
; GCN-NEXT: s_load_dwordx4 s[4:7], s[16:17], 0x10
@ -124,7 +122,6 @@ define void @mubuf_clause(<4 x i32> addrspace(5)* noalias nocapture readonly %ar
; GCN-NEXT: buffer_load_dword v17, v0, s[0:3], 0 offen offset:56
; GCN-NEXT: v_add_u32_e32 v1, v1, v2
; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen offset:60
; GCN-NEXT: s_nop 0
; GCN-NEXT: s_waitcnt vmcnt(15)
; GCN-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen
; GCN-NEXT: s_waitcnt vmcnt(15)
@ -191,7 +188,6 @@ define amdgpu_kernel void @vector_clause_indirect(i64 addrspace(1)* noalias noca
; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_load_dwordx2 v[8:9], v0, s[2:3]
; GCN-NEXT: s_nop 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: global_load_dwordx4 v[0:3], v[8:9], off
; GCN-NEXT: global_load_dwordx4 v[4:7], v[8:9], off offset:16
@ -225,7 +221,6 @@ define void @load_global_d16_hi(i16 addrspace(1)* %in, i16 %reg, <2 x i16> addrs
; GCN-NEXT: global_load_short_d16_hi v5, v[0:1], off
; GCN-NEXT: s_nop 0
; GCN-NEXT: global_load_short_d16_hi v2, v[0:1], off offset:64
; GCN-NEXT: s_nop 0
; GCN-NEXT: s_waitcnt vmcnt(1)
; GCN-NEXT: global_store_dword v[3:4], v5, off
; GCN-NEXT: s_waitcnt vmcnt(1)
@ -254,7 +249,6 @@ define void @load_global_d16_lo(i16 addrspace(1)* %in, i32 %reg, <2 x i16> addrs
; GCN-NEXT: global_load_short_d16 v5, v[0:1], off
; GCN-NEXT: s_nop 0
; GCN-NEXT: global_load_short_d16 v2, v[0:1], off offset:64
; GCN-NEXT: s_nop 0
; GCN-NEXT: s_waitcnt vmcnt(1)
; GCN-NEXT: global_store_dword v[3:4], v5, off
; GCN-NEXT: s_waitcnt vmcnt(1)

View File

@ -1,137 +0,0 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -run-pass=si-shrink-instructions %s -o - | FileCheck %s
---
name: merge_2_nop
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: merge_2_nop
; CHECK: S_NOP 1
S_NOP 0
S_NOP 0
...
---
name: merge_3_nop
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: merge_3_nop
; CHECK: S_NOP 2
S_NOP 0
S_NOP 0
S_NOP 0
...
---
name: merge_7_nop
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: merge_7_nop
; CHECK: S_NOP 6
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
...
---
name: merge_8_nop
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: merge_8_nop
; CHECK: S_NOP 7
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
...
---
name: merge_9_nop
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: merge_9_nop
; CHECK: S_NOP 7
; CHECK: S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
S_NOP 0
...
---
name: no_merge_impdef0
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: no_merge_impdef0
; CHECK: S_NOP 0, implicit-def $sgpr0
; CHECK: S_NOP 0
S_NOP 0, implicit-def $sgpr0
S_NOP 0
...
---
name: no_merge_impdef1
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: no_merge_impdef1
; CHECK: S_NOP 0
; CHECK: S_NOP 0, implicit-def $sgpr0
S_NOP 0
S_NOP 0, implicit-def $sgpr0
...
---
name: no_merge_impdef_both
tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: no_merge_impdef_both
; CHECK: S_NOP 0
; CHECK: S_NOP 0, implicit-def $sgpr0
S_NOP 0
S_NOP 0, implicit-def $sgpr0
...

View File

@ -3,7 +3,6 @@
# GCN-LABEL: name: test
# GCN: V_MFMA_F32_32X32X1F32
# GCN: S_BARRIER
# GCN: S_NOP 0
# GCN: V_ACCVGPR_READ_B32
# GCN: BUFFER_STORE_DWORD_OFFEN
---