[HazardRec] Allow inserting multiple wait-states simultaneously

If a target can encode multiple wait-states into a noop allow emitting such
instructions directly.

Reviewed By: rampitec, dmgreen

Differential Revision: https://reviews.llvm.org/D89753
This commit is contained in:
Austin Kerbow 2020-10-19 14:38:02 -07:00
parent 6781fee085
commit 37d907899f
15 changed files with 148 additions and 358 deletions

View File

@ -114,6 +114,14 @@ public:
// Default implementation: count it as a cycle.
AdvanceCycle();
}
/// EmitNoops - This callback is invoked when noops were added to the
/// instruction stream.
virtual void EmitNoops(unsigned Quantity) {
// Default implementation: count it as a cycle.
for (unsigned i = 0; i < Quantity; ++i)
EmitNoop();
}
};
} // end namespace llvm

View File

@ -1343,6 +1343,11 @@ public:
virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
/// Insert noops into the instruction stream at the specified point.
virtual void insertNoops(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned Quantity) const;
/// Return the noop instruction to use for a noop.
virtual void getNoop(MCInst &NopInst) const;

View File

@ -82,11 +82,9 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
for (MachineInstr &MI : MBB) {
// If we need to emit noops prior to this instruction, then do so.
unsigned NumPreNoops = HazardRec->PreEmitNoops(&MI);
for (unsigned i = 0; i != NumPreNoops; ++i) {
HazardRec->EmitNoop();
TII->insertNoop(MBB, MachineBasicBlock::iterator(MI));
++NumNoops;
}
HazardRec->EmitNoops(NumPreNoops);
TII->insertNoops(MBB, MachineBasicBlock::iterator(MI), NumPreNoops);
NumNoops += NumPreNoops;
HazardRec->EmitInstruction(&MI);
if (HazardRec->atIssueLimit()) {

View File

@ -69,6 +69,15 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
llvm_unreachable("Target didn't implement insertNoop!");
}
/// insertNoops - Insert noops into the instruction stream at the specified
/// point.
void TargetInstrInfo::insertNoops(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned Quantity) const {
for (unsigned i = 0; i < Quantity; ++i)
insertNoop(MBB, MI);
}
static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
return strncmp(Str, MAI.getCommentString().data(),
MAI.getCommentString().size()) == 0;

View File

@ -1047,9 +1047,6 @@ void GCNPassConfig::addPreEmitPass() {
//
// Here we add a stand-alone hazard recognizer pass which can handle all
// cases.
//
// FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
// be better for it to emit S_NOP <N> when possible.
addPass(&PostRAHazardRecognizerID);
addPass(&BranchRelaxationPassID);
}

View File

@ -1533,25 +1533,24 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addMemOperand(MMO);
}
void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
int Count) const {
DebugLoc DL = MBB.findDebugLoc(MI);
while (Count > 0) {
int Arg;
if (Count >= 8)
Arg = 7;
else
Arg = Count - 1;
Count -= 8;
BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
.addImm(Arg);
}
}
void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
insertWaitStates(MBB, MI, 1);
insertNoops(MBB, MI, 1);
}
void SIInstrInfo::insertNoops(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned Quantity) const {
DebugLoc DL = MBB.findDebugLoc(MI);
while (Quantity > 0) {
unsigned Arg;
if (Quantity >= 8)
Arg = 7;
else
Arg = Quantity - 1;
Quantity -= Arg + 1;
BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)).addImm(Arg);
}
}
void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {

View File

@ -898,12 +898,12 @@ public:
/// VALU if necessary. If present, \p MDT is updated.
void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
int Count) const;
void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned Quantity) const override;
void insertReturn(MachineBasicBlock &MBB) const;
/// Return the number of wait states that result from executing this
/// instruction.

View File

@ -2,9 +2,7 @@
# GCN-LABEL: name: flat_atomic_fcmpswap_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FCMPSWAP
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fcmpswap_to_s_denorm_mode
@ -16,9 +14,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FCMPSWAP_X2
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
@ -30,9 +26,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmax_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMAX
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_to_s_denorm_mode
@ -44,9 +38,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmax_x2_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMAX_X2
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_x2_to_s_denorm_mode
@ -58,9 +50,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmin_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMIN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_to_s_denorm_mode
@ -72,9 +62,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmin_x2_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMIN_X2
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_x2_to_s_denorm_mode
@ -86,9 +74,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FCMPSWAP_X2_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
@ -100,9 +86,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmax_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMAX_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_rtn_to_s_denorm_mode
@ -114,9 +98,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMAX_X2_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
@ -128,9 +110,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmin_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMIN_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_rtn_to_s_denorm_mode
@ -142,9 +122,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FMIN_X2_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
@ -156,9 +134,7 @@ body: |
# GCN-LABEL: name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
# GCN: FLAT_ATOMIC_FCMPSWAP_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
@ -170,9 +146,7 @@ body: |
# GCN-LABEL: name: global_atomic_fcmpswap_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_to_s_denorm_mode
@ -184,9 +158,7 @@ body: |
# GCN-LABEL: name: global_atomic_fcmpswap_x2_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_x2_to_s_denorm_mode
@ -198,9 +170,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmax_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_to_s_denorm_mode
@ -212,9 +182,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmax_x2_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_X2
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_x2_to_s_denorm_mode
@ -226,9 +194,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmin_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_to_s_denorm_mode
@ -240,9 +206,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmin_x2_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_X2
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_x2_to_s_denorm_mode
@ -254,9 +218,7 @@ body: |
# GCN-LABEL: name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
@ -268,9 +230,7 @@ body: |
# GCN-LABEL: name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
@ -282,9 +242,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmax_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_rtn_to_s_denorm_mode
@ -296,9 +254,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_X2_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
@ -310,9 +266,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmin_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_rtn_to_s_denorm_mode
@ -324,9 +278,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_X2_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
@ -338,9 +290,7 @@ body: |
# GCN-LABEL: name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_SADDR
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
@ -352,9 +302,7 @@ body: |
# GCN-LABEL: name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
@ -366,9 +314,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_SADDR_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
@ -380,9 +326,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
@ -394,9 +338,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_SADDR_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
@ -408,9 +350,7 @@ body: |
# GCN-LABEL: name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
# GCN: GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: S_DENORM_MODE
---
name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode

View File

@ -612,8 +612,7 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
; SI-NEXT: s_nop 0
; SI-NEXT: s_nop 0
; SI-NEXT: s_nop 1
; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11]
; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1]
; SI-NEXT: v_bfe_u32 v6, v5, 20, 11
@ -740,8 +739,7 @@ define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrs
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
; SI-NEXT: s_nop 0
; SI-NEXT: s_nop 0
; SI-NEXT: s_nop 1
; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11]
; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1]
; SI-NEXT: v_bfe_u32 v6, v5, 20, 11
@ -1842,8 +1840,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v7, v9
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v3, v13
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
; SI-NEXT: s_nop 0
; SI-NEXT: s_nop 0
; SI-NEXT: s_nop 1
; SI-NEXT: v_div_fmas_f64 v[8:9], v[16:17], v[10:11], v[14:15]
; SI-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3]
; SI-NEXT: v_bfe_u32 v10, v9, 20, 11
@ -1876,8 +1873,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v11
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
; SI-NEXT: s_nop 0
; SI-NEXT: s_nop 0
; SI-NEXT: s_nop 1
; SI-NEXT: v_div_fmas_f64 v[6:7], v[14:15], v[8:9], v[12:13]
; SI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1]
; SI-NEXT: v_bfe_u32 v8, v7, 20, 11

View File

@ -40,10 +40,7 @@ body: |
# GCN-LABEL: name: vmem_vcc_hazard_ignore_bundle_instr
# GCN: S_LOAD_DWORDX2_IMM
# GCN-NEXT: }
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP 3
# GCN: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_hazard_ignore_bundle_instr
@ -63,11 +60,7 @@ body: |
# GCN-LABEL: name: vmem_vcc_min_of_two_after_bundle
# GCN: bb.2:
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP 4
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_min_of_two_after_bundle

View File

@ -1,5 +1,5 @@
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,SICI
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,SICI
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9
@ -24,26 +24,17 @@
# GCN-LABEL: bb.1:
# GCN: V_CMP_EQ_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP 3
# GCN: V_DIV_FMAS_F32
# GCN-LABEL: bb.2:
# GCN: V_CMP_EQ_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP 3
# GCN: V_DIV_FMAS_F32
# GCN-LABEL: bb.3:
# GCN: V_DIV_SCALE_F32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP 3
# GCN: V_DIV_FMAS_F32
name: div_fmas
@ -76,14 +67,12 @@ body: |
# GCN-LABEL: bb.0:
# GCN: S_SETREG
# GCN: S_NOP 0
# GCN: S_NOP 0
# GCN: S_NOP 1
# GCN: S_GETREG
# GCN-LABEL: bb.1:
# GCN: S_SETREG_IMM32
# GCN: S_NOP 0
# GCN: S_NOP 0
# GCN: S_NOP 1
# GCN: S_GETREG
# GCN-LABEL: bb.2:
@ -126,15 +115,15 @@ body: |
# GCN-LABEL: bb.0:
# GCN: S_SETREG
# GCN: S_NOP 0
# VI: S_NOP 0
# GCN-NEXT: S_SETREG
# SICI: S_NOP 0
# VI: S_NOP 1
# GCN: S_SETREG
# GCN-LABEL: bb.1:
# GCN: S_SETREG
# GCN: S_NOP 0
# VI: S_NOP 0
# GCN-NEXT: S_SETREG
# SICI: S_NOP 0
# VI: S_NOP 1
# GCN: S_SETREG
# GCN-LABEL: bb.2:
# GCN: S_SETREG
@ -239,34 +228,22 @@ body: |
# GCN-LABEL: bb.0:
# GCN: V_ADD_CO_U32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP 3
# GCN: V_READLANE_B32
# GCN-LABEL: bb.1:
# GCN: V_ADD_CO_U32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP 3
# GCN: V_WRITELANE_B32
# GCN-LABEL: bb.2:
# GCN: V_ADD_CO_U32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP 3
# GCN: V_READLANE_B32
# GCN-LABEL: bb.3:
# GCN: V_ADD_CO_U32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP 3
# GCN: V_WRITELANE_B32
name: readwrite_lane
@ -429,17 +406,12 @@ body: |
# VI-LABEL: bb.0:
# VI: V_MOV_B32_e32
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 1
# VI-NEXT: V_MOV_B32_dpp
# VI-LABEL: bb.1:
# VI: V_CMPX_EQ_I32_e32
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 4
# VI-NEXT: V_MOV_B32_dpp
name: dpp

View File

@ -7,9 +7,7 @@
; VI-LABEL: {{^}}dpp_test:
; VI: v_mov_b32_e32 v0, s{{[0-9]+}}
; VI-NOOPT: v_mov_b32_e32 v1, s{{[0-9]+}}
; PREGFX10-OPT: s_nop 1
; PREGFX10-NOOPT: s_nop 0
; PREGFX10-NOOPT: s_nop 0
; PREGFX10: s_nop 1
; VI-OPT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
; VI-NOOPT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11]
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
@ -21,14 +19,10 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
; VI-LABEL: {{^}}dpp_wait_states:
; VI-NOOPT: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s{{[0-9]+}}
; VI: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}}
; PREGFX10-OPT: s_nop 1
; PREGFX10-NOOPT: s_nop 0
; PREGFX10-NOOPT: s_nop 0
; PREGFX10: s_nop 1
; VI-OPT: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
; VI-NOOPT: v_mov_b32_dpp [[VGPR1]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:
; PREGFX10-OPT: s_nop 1
; PREGFX10-NOOPT: s_nop 0
; PREGFX10-NOOPT: s_nop 0
; PREGFX10: s_nop 1
; VI-OPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
; VI-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
@ -44,13 +38,10 @@ define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
; PREGFX10-OPT: s_mov_b32
; PREGFX10-NOOPT: s_waitcnt
; PREGFX10-NOOPT: v_mov_b32_e32
; PREGFX10-NOOPT-NEXT: s_nop 0
; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
; PREGFX10-OPT: s_nop 1
; PREGFX10: s_nop 1
; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
; PREGFX10-OPT: s_nop 1
; PREGFX10-NOOPT: s_nop 0
; PREGFX10-NOOPT: s_nop 0
; PREGFX10: s_nop 1
; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
define amdgpu_kernel void @dpp_first_in_bb(float addrspace(1)* %out, float addrspace(1)* %in, float %cond, float %a, float %b) {
%cmp = fcmp oeq float %cond, 0.0

View File

@ -5,9 +5,7 @@
; GCN-LABEL: {{^}}dpp_test:
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
; GFX8-OPT: s_nop 1
; GFX8-NOOPT: s_nop 0
; GFX8-NOOPT-NEXT: s_nop 0
; GFX8: s_nop 1
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0) #0
@ -18,9 +16,7 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2)
; GCN-LABEL: {{^}}dpp_test_bc:
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
; GFX8-OPT: s_nop 1
; GFX8-NOOPT: s_nop 0
; GFX8-NOOPT-NEXT: s_nop 0
; GFX8: s_nop 1
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[2,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0{{$}}
define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 2, i32 1, i32 1, i1 1) #0
@ -34,8 +30,9 @@ define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in
; GFX8-OPT: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}
; GFX8-NOOPT: v_add_u32_e64 [[REG:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}}
; GFX8-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0
; GFX8: s_nop 0
; GFX8-NEXT: s_nop 0
; GFX8-NOOPT: s_nop 1
; GFX8-OPT: s_nop 0
; GFX8-OPT-NEXT: s_nop 0
; GFX8-NEXT: v_mov_b32_dpp {{v[0-9]+}}, [[REG]] quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf
@0 = internal unnamed_addr addrspace(3) global [448 x i32] undef, align 4
define weak_odr amdgpu_kernel void @dpp_test1(i32* %arg) local_unnamed_addr {

View File

@ -3,8 +3,7 @@
# GCN-LABEL: name: valu_write_vgpr_mfma_read
# GCN: V_MOV_B32
# GCN: V_MOV_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: valu_write_vgpr_mfma_read
body: |
@ -17,8 +16,7 @@ body: |
# GCN-LABEL: name: valu_write_vgpr_accvgpr_write_read
# GCN: V_MOV_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: valu_write_vgpr_accvgpr_write_read
body: |
@ -41,8 +39,7 @@ body: |
# GCN-LABEL: name: mfma_write_agpr_mfma_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: mfma_write_agpr_mfma_read_overlap
body: |
@ -54,8 +51,7 @@ body: |
# GCN-LABEL: name: mfma_write_agpr_mfma_read_partial
# GCN: V_MFMA
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: mfma_write_agpr_mfma_read_partial
body: |
@ -67,10 +63,7 @@ body: |
# GCN-LABEL: name: mfma_write_agpr_mfma_srca_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MFMA
name: mfma_write_agpr_mfma_srca_read_overlap
body: |
@ -82,10 +75,7 @@ body: |
# GCN-LABEL: name: mfma_write_agpr_mfma_srcb_read_overlap
# GCN: V_MFMA
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MFMA
name: mfma_write_agpr_mfma_srcb_read_overlap
body: |
@ -97,10 +87,7 @@ body: |
# GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_read
# GCN: V_MFMA_F32_4X4X1F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_ACCVGPR_READ_B32
name: mfma_4x4_write_agpr_accvgpr_read
body: |
@ -112,16 +99,8 @@ body: |
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_read
# GCN: V_MFMA_F32_16X16X1F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_ACCVGPR_READ_B32
name: mfma_16x16_write_agpr_accvgpr_read
body: |
@ -133,24 +112,9 @@ body: |
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_read
# GCN: V_MFMA_F32_32X32X2F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_ACCVGPR_READ_B32
name: mfma_32x32_write_agpr_accvgpr_read
body: |
@ -174,13 +138,7 @@ body: |
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_write
# GCN: V_MFMA_F32_16X16X1F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_16x16_write_agpr_accvgpr_write
body: |
@ -192,21 +150,8 @@ body: |
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_write
# GCN: V_MFMA_F32_32X32X2F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 6
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_32x32_write_agpr_accvgpr_write
body: |
@ -229,11 +174,7 @@ body: |
# GCN-LABEL: name: mfma_16x16_read_srcc_accvgpr_write
# GCN: V_MFMA_F32_16X16X1F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_16x16_read_srcc_accvgpr_write
body: |
@ -245,19 +186,8 @@ body: |
# GCN-LABEL: name: mfma_32x32_read_srcc_accvgpr_write
# GCN: V_MFMA_F32_32X32X2F32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 7
# GCN-NEXT: S_NOP 4
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: mfma_32x32_read_srcc_accvgpr_write
body: |
@ -280,8 +210,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_mfma_read
# GCN: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MFMA
name: accvgpr_read_write_vgpr_mfma_read
body: |
@ -293,8 +222,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_accvgpr_write_read
# GCN: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: accvgpr_read_write_vgpr_accvgpr_write_read
body: |
@ -318,9 +246,7 @@ body: |
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srca
# GCN: V_ACCVGPR_WRITE_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: accvgpr_write_agpr_mfma_read_srca
body: |
@ -332,9 +258,7 @@ body: |
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcb
# GCN: V_ACCVGPR_WRITE_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_MFMA
name: accvgpr_write_agpr_mfma_read_srcb
body: |
@ -346,9 +270,7 @@ body: |
# GCN-LABEL: name: accvgpr_write_agpr_accvgpr_read
# GCN: V_ACCVGPR_WRITE_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 2
# GCN-NEXT: V_ACCVGPR_READ_B32
name: accvgpr_write_agpr_accvgpr_read
body: |
@ -360,10 +282,7 @@ body: |
# GCN-LABEL: name: vcmpx_write_exec_mfma
# GCN: V_CMPX_EQ_I32_e32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_MFMA
name: vcmpx_write_exec_mfma
body: |
@ -375,10 +294,7 @@ body: |
# GCN-LABEL: name: vcmpx_write_exec_accvgpr_write
# GCN: V_CMPX_EQ_I32_e32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 3
# GCN-NEXT: V_ACCVGPR_WRITE_B32
name: vcmpx_write_exec_accvgpr_write
body: |
@ -390,8 +306,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_load
# GCN: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 1
# GCN-NEXT: FLAT_LOAD_DWORD
name: accvgpr_read_write_vgpr_load
body: |
@ -403,8 +318,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_ds_permute
# GCN: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 1
# GCN-NEXT: DS_PERMUTE_B32
name: accvgpr_read_write_vgpr_ds_permute
body: |
@ -416,8 +330,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_flat_load
# GCN: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 1
# GCN-NEXT: FLAT_LOAD_DWORD
name: accvgpr_read_write_vgpr_flat_load
body: |
@ -429,8 +342,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_buffer_store
# GCN: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 1
# GCN-NEXT: BUFFER_STORE_DWORD_OFFSET
name: accvgpr_read_write_vgpr_buffer_store
body: |
@ -442,8 +354,7 @@ body: |
# GCN-LABEL: name: accvgpr_read_write_vgpr_store
# GCN: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 1
# GCN-NEXT: DS_WRITE_B32
name: accvgpr_read_write_vgpr_store
body: |
@ -497,8 +408,7 @@ body: |
# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
# GCN: V_MOV_B32
# GCN-NEXT: V_ACCVGPR_READ_B32
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 1
# GCN-NEXT: FLAT_LOAD_DWORD
name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
body: |

View File

@ -2,11 +2,7 @@
# GCN-LABEL: name: vmem_vcc_fallthrough
# GCN: bb.1:
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP 4
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_fallthrough
@ -23,10 +19,7 @@ body: |
...
# GCN-LABEL: name: vmem_vcc_branch_to_next
# GCN: bb.1:
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP 3
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_branch_to_next
@ -82,10 +75,7 @@ body: |
...
# GCN-LABEL: name: vmem_vcc_branch_around
# GCN: bb.2:
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP 3
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_branch_around
@ -110,10 +100,7 @@ body: |
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_branch_backedge
# GCN: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN: S_NOP 3
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_branch_backedge
@ -132,11 +119,7 @@ body: |
...
# GCN-LABEL: name: vmem_vcc_min_of_two
# GCN: bb.2:
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP 4
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_min_of_two
@ -159,10 +142,7 @@ body: |
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: name: vmem_vcc_self_loop
# GCN: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN: S_NOP 3
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_self_loop
@ -179,10 +159,7 @@ body: |
# GCN-LABEL: name: vmem_vcc_min_of_two_self_loop1
# GCN: bb.1:
# GCN: $sgpr0 = S_MOV_B32 0
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP 3
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_min_of_two_self_loop1
@ -205,9 +182,7 @@ body: |
# GCN-LABEL: name: vmem_vcc_min_of_two_self_loop2
# GCN: bb.1:
# GCN: $sgpr0 = S_MOV_B32 0
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP
# GCN-NEXT: S_NOP 2
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_min_of_two_self_loop2