forked from OSchip/llvm-project
[HazardRec] Allow inserting multiple wait-states simultaneously
If a target can encode multiple wait-states into a noop allow emitting such instructions directly. Reviewed By: rampitec, dmgreen Differential Revision: https://reviews.llvm.org/D89753
This commit is contained in:
parent
6781fee085
commit
37d907899f
|
@ -114,6 +114,14 @@ public:
|
|||
// Default implementation: count it as a cycle.
|
||||
AdvanceCycle();
|
||||
}
|
||||
|
||||
/// EmitNoops - This callback is invoked when noops were added to the
|
||||
/// instruction stream.
|
||||
virtual void EmitNoops(unsigned Quantity) {
|
||||
// Default implementation: count it as a cycle.
|
||||
for (unsigned i = 0; i < Quantity; ++i)
|
||||
EmitNoop();
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
|
|
@ -1343,6 +1343,11 @@ public:
|
|||
virtual void insertNoop(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI) const;
|
||||
|
||||
/// Insert noops into the instruction stream at the specified point.
|
||||
virtual void insertNoops(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned Quantity) const;
|
||||
|
||||
/// Return the noop instruction to use for a noop.
|
||||
virtual void getNoop(MCInst &NopInst) const;
|
||||
|
||||
|
|
|
@ -82,11 +82,9 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
|
|||
for (MachineInstr &MI : MBB) {
|
||||
// If we need to emit noops prior to this instruction, then do so.
|
||||
unsigned NumPreNoops = HazardRec->PreEmitNoops(&MI);
|
||||
for (unsigned i = 0; i != NumPreNoops; ++i) {
|
||||
HazardRec->EmitNoop();
|
||||
TII->insertNoop(MBB, MachineBasicBlock::iterator(MI));
|
||||
++NumNoops;
|
||||
}
|
||||
HazardRec->EmitNoops(NumPreNoops);
|
||||
TII->insertNoops(MBB, MachineBasicBlock::iterator(MI), NumPreNoops);
|
||||
NumNoops += NumPreNoops;
|
||||
|
||||
HazardRec->EmitInstruction(&MI);
|
||||
if (HazardRec->atIssueLimit()) {
|
||||
|
|
|
@ -69,6 +69,15 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
|||
llvm_unreachable("Target didn't implement insertNoop!");
|
||||
}
|
||||
|
||||
/// insertNoops - Insert noops into the instruction stream at the specified
|
||||
/// point.
|
||||
void TargetInstrInfo::insertNoops(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned Quantity) const {
|
||||
for (unsigned i = 0; i < Quantity; ++i)
|
||||
insertNoop(MBB, MI);
|
||||
}
|
||||
|
||||
static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
|
||||
return strncmp(Str, MAI.getCommentString().data(),
|
||||
MAI.getCommentString().size()) == 0;
|
||||
|
|
|
@ -1047,9 +1047,6 @@ void GCNPassConfig::addPreEmitPass() {
|
|||
//
|
||||
// Here we add a stand-alone hazard recognizer pass which can handle all
|
||||
// cases.
|
||||
//
|
||||
// FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
|
||||
// be better for it to emit S_NOP <N> when possible.
|
||||
addPass(&PostRAHazardRecognizerID);
|
||||
addPass(&BranchRelaxationPassID);
|
||||
}
|
||||
|
|
|
@ -1533,25 +1533,24 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||
.addMemOperand(MMO);
|
||||
}
|
||||
|
||||
void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
int Count) const {
|
||||
DebugLoc DL = MBB.findDebugLoc(MI);
|
||||
while (Count > 0) {
|
||||
int Arg;
|
||||
if (Count >= 8)
|
||||
Arg = 7;
|
||||
else
|
||||
Arg = Count - 1;
|
||||
Count -= 8;
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP))
|
||||
.addImm(Arg);
|
||||
}
|
||||
}
|
||||
|
||||
void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI) const {
|
||||
insertWaitStates(MBB, MI, 1);
|
||||
insertNoops(MBB, MI, 1);
|
||||
}
|
||||
|
||||
void SIInstrInfo::insertNoops(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned Quantity) const {
|
||||
DebugLoc DL = MBB.findDebugLoc(MI);
|
||||
while (Quantity > 0) {
|
||||
unsigned Arg;
|
||||
if (Quantity >= 8)
|
||||
Arg = 7;
|
||||
else
|
||||
Arg = Quantity - 1;
|
||||
Quantity -= Arg + 1;
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)).addImm(Arg);
|
||||
}
|
||||
}
|
||||
|
||||
void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
|
||||
|
|
|
@ -898,12 +898,12 @@ public:
|
|||
/// VALU if necessary. If present, \p MDT is updated.
|
||||
void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
|
||||
|
||||
void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
|
||||
int Count) const;
|
||||
|
||||
void insertNoop(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI) const override;
|
||||
|
||||
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
|
||||
unsigned Quantity) const override;
|
||||
|
||||
void insertReturn(MachineBasicBlock &MBB) const;
|
||||
/// Return the number of wait states that result from executing this
|
||||
/// instruction.
|
||||
|
|
|
@ -2,9 +2,7 @@
|
|||
|
||||
# GCN-LABEL: name: flat_atomic_fcmpswap_to_s_denorm_mode
|
||||
# GCN: FLAT_ATOMIC_FCMPSWAP
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: flat_atomic_fcmpswap_to_s_denorm_mode
|
||||
|
@ -16,9 +14,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
|
||||
# GCN: FLAT_ATOMIC_FCMPSWAP_X2
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: flat_atomic_fcmpswap_x2_to_s_denorm_mode
|
||||
|
@ -30,9 +26,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: flat_atomic_fmax_to_s_denorm_mode
|
||||
# GCN: FLAT_ATOMIC_FMAX
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: flat_atomic_fmax_to_s_denorm_mode
|
||||
|
@ -44,9 +38,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: flat_atomic_fmax_x2_to_s_denorm_mode
|
||||
# GCN: FLAT_ATOMIC_FMAX_X2
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: flat_atomic_fmax_x2_to_s_denorm_mode
|
||||
|
@ -58,9 +50,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: flat_atomic_fmin_to_s_denorm_mode
|
||||
# GCN: FLAT_ATOMIC_FMIN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: flat_atomic_fmin_to_s_denorm_mode
|
||||
|
@ -72,9 +62,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: flat_atomic_fmin_x2_to_s_denorm_mode
|
||||
# GCN: FLAT_ATOMIC_FMIN_X2
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: flat_atomic_fmin_x2_to_s_denorm_mode
|
||||
|
@ -86,9 +74,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
|
||||
# GCN: FLAT_ATOMIC_FCMPSWAP_X2_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: flat_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
|
||||
|
@ -100,9 +86,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: flat_atomic_fmax_rtn_to_s_denorm_mode
|
||||
# GCN: FLAT_ATOMIC_FMAX_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: flat_atomic_fmax_rtn_to_s_denorm_mode
|
||||
|
@ -114,9 +98,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
|
||||
# GCN: FLAT_ATOMIC_FMAX_X2_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: flat_atomic_fmax_x2_rtn_to_s_denorm_mode
|
||||
|
@ -128,9 +110,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: flat_atomic_fmin_rtn_to_s_denorm_mode
|
||||
# GCN: FLAT_ATOMIC_FMIN_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: flat_atomic_fmin_rtn_to_s_denorm_mode
|
||||
|
@ -142,9 +122,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
|
||||
# GCN: FLAT_ATOMIC_FMIN_X2_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: flat_atomic_fmin_x2_rtn_to_s_denorm_mode
|
||||
|
@ -156,9 +134,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
|
||||
# GCN: FLAT_ATOMIC_FCMPSWAP_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: flat_atomic_fcmpswap_rtn_to_s_denorm_mode
|
||||
|
@ -170,9 +146,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fcmpswap_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FCMPSWAP
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fcmpswap_to_s_denorm_mode
|
||||
|
@ -184,9 +158,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fcmpswap_x2_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fcmpswap_x2_to_s_denorm_mode
|
||||
|
@ -198,9 +170,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fmax_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FMAX
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fmax_to_s_denorm_mode
|
||||
|
@ -212,9 +182,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fmax_x2_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FMAX_X2
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fmax_x2_to_s_denorm_mode
|
||||
|
@ -226,9 +194,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fmin_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FMIN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fmin_to_s_denorm_mode
|
||||
|
@ -240,9 +206,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fmin_x2_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FMIN_X2
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fmin_x2_to_s_denorm_mode
|
||||
|
@ -254,9 +218,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FCMPSWAP_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fcmpswap_rtn_to_s_denorm_mode
|
||||
|
@ -268,9 +230,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fcmpswap_x2_rtn_to_s_denorm_mode
|
||||
|
@ -282,9 +242,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fmax_rtn_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FMAX_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fmax_rtn_to_s_denorm_mode
|
||||
|
@ -296,9 +254,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FMAX_X2_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fmax_x2_rtn_to_s_denorm_mode
|
||||
|
@ -310,9 +266,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fmin_rtn_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FMIN_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fmin_rtn_to_s_denorm_mode
|
||||
|
@ -324,9 +278,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FMIN_X2_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fmin_x2_rtn_to_s_denorm_mode
|
||||
|
@ -338,9 +290,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FCMPSWAP_SADDR
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fcmpswap_saddr_to_s_denorm_mode
|
||||
|
@ -352,9 +302,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FCMPSWAP_X2_SADDR_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fcmpswap_x2_saddr_rtn_to_s_denorm_mode
|
||||
|
@ -366,9 +314,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FMAX_SADDR_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fmax_saddr_rtn_to_s_denorm_mode
|
||||
|
@ -380,9 +326,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FMAX_X2_SADDR_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fmax_x2_saddr_rtn_to_s_denorm_mode
|
||||
|
@ -394,9 +338,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FMIN_SADDR_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fmin_saddr_rtn_to_s_denorm_mode
|
||||
|
@ -408,9 +350,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
|
||||
# GCN: GLOBAL_ATOMIC_FMIN_X2_SADDR_RTN
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: S_DENORM_MODE
|
||||
---
|
||||
name: global_atomic_fmin_x2_saddr_rtn_to_s_denorm_mode
|
||||
|
|
|
@ -612,8 +612,7 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(
|
|||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
||||
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9
|
||||
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
|
||||
; SI-NEXT: s_nop 0
|
||||
; SI-NEXT: s_nop 0
|
||||
; SI-NEXT: s_nop 1
|
||||
; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11]
|
||||
; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1]
|
||||
; SI-NEXT: v_bfe_u32 v6, v5, 20, 11
|
||||
|
@ -740,8 +739,7 @@ define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrs
|
|||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
|
||||
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v9
|
||||
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
|
||||
; SI-NEXT: s_nop 0
|
||||
; SI-NEXT: s_nop 0
|
||||
; SI-NEXT: s_nop 1
|
||||
; SI-NEXT: v_div_fmas_f64 v[4:5], v[12:13], v[6:7], v[10:11]
|
||||
; SI-NEXT: v_div_fixup_f64 v[4:5], v[4:5], v[2:3], v[0:1]
|
||||
; SI-NEXT: v_bfe_u32 v6, v5, 20, 11
|
||||
|
@ -1842,8 +1840,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
|
|||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v7, v9
|
||||
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v3, v13
|
||||
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
|
||||
; SI-NEXT: s_nop 0
|
||||
; SI-NEXT: s_nop 0
|
||||
; SI-NEXT: s_nop 1
|
||||
; SI-NEXT: v_div_fmas_f64 v[8:9], v[16:17], v[10:11], v[14:15]
|
||||
; SI-NEXT: v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3]
|
||||
; SI-NEXT: v_bfe_u32 v10, v9, 20, 11
|
||||
|
@ -1876,8 +1873,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
|
|||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7
|
||||
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], v1, v11
|
||||
; SI-NEXT: s_xor_b64 vcc, s[0:1], vcc
|
||||
; SI-NEXT: s_nop 0
|
||||
; SI-NEXT: s_nop 0
|
||||
; SI-NEXT: s_nop 1
|
||||
; SI-NEXT: v_div_fmas_f64 v[6:7], v[14:15], v[8:9], v[12:13]
|
||||
; SI-NEXT: v_div_fixup_f64 v[6:7], v[6:7], v[4:5], v[0:1]
|
||||
; SI-NEXT: v_bfe_u32 v8, v7, 20, 11
|
||||
|
|
|
@ -40,10 +40,7 @@ body: |
|
|||
# GCN-LABEL: name: vmem_vcc_hazard_ignore_bundle_instr
|
||||
# GCN: S_LOAD_DWORDX2_IMM
|
||||
# GCN-NEXT: }
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP 3
|
||||
# GCN: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_hazard_ignore_bundle_instr
|
||||
|
@ -63,11 +60,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: vmem_vcc_min_of_two_after_bundle
|
||||
# GCN: bb.2:
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP 4
|
||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_min_of_two_after_bundle
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,SICI
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,SICI
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9
|
||||
|
||||
|
@ -24,26 +24,17 @@
|
|||
|
||||
# GCN-LABEL: bb.1:
|
||||
# GCN: V_CMP_EQ_I32
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP 3
|
||||
# GCN: V_DIV_FMAS_F32
|
||||
|
||||
# GCN-LABEL: bb.2:
|
||||
# GCN: V_CMP_EQ_I32
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP 3
|
||||
# GCN: V_DIV_FMAS_F32
|
||||
|
||||
# GCN-LABEL: bb.3:
|
||||
# GCN: V_DIV_SCALE_F32
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP 3
|
||||
# GCN: V_DIV_FMAS_F32
|
||||
name: div_fmas
|
||||
|
||||
|
@ -76,14 +67,12 @@ body: |
|
|||
|
||||
# GCN-LABEL: bb.0:
|
||||
# GCN: S_SETREG
|
||||
# GCN: S_NOP 0
|
||||
# GCN: S_NOP 0
|
||||
# GCN: S_NOP 1
|
||||
# GCN: S_GETREG
|
||||
|
||||
# GCN-LABEL: bb.1:
|
||||
# GCN: S_SETREG_IMM32
|
||||
# GCN: S_NOP 0
|
||||
# GCN: S_NOP 0
|
||||
# GCN: S_NOP 1
|
||||
# GCN: S_GETREG
|
||||
|
||||
# GCN-LABEL: bb.2:
|
||||
|
@ -126,15 +115,15 @@ body: |
|
|||
|
||||
# GCN-LABEL: bb.0:
|
||||
# GCN: S_SETREG
|
||||
# GCN: S_NOP 0
|
||||
# VI: S_NOP 0
|
||||
# GCN-NEXT: S_SETREG
|
||||
# SICI: S_NOP 0
|
||||
# VI: S_NOP 1
|
||||
# GCN: S_SETREG
|
||||
|
||||
# GCN-LABEL: bb.1:
|
||||
# GCN: S_SETREG
|
||||
# GCN: S_NOP 0
|
||||
# VI: S_NOP 0
|
||||
# GCN-NEXT: S_SETREG
|
||||
# SICI: S_NOP 0
|
||||
# VI: S_NOP 1
|
||||
# GCN: S_SETREG
|
||||
|
||||
# GCN-LABEL: bb.2:
|
||||
# GCN: S_SETREG
|
||||
|
@ -239,34 +228,22 @@ body: |
|
|||
|
||||
# GCN-LABEL: bb.0:
|
||||
# GCN: V_ADD_CO_U32
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP 3
|
||||
# GCN: V_READLANE_B32
|
||||
|
||||
# GCN-LABEL: bb.1:
|
||||
# GCN: V_ADD_CO_U32
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP 3
|
||||
# GCN: V_WRITELANE_B32
|
||||
|
||||
# GCN-LABEL: bb.2:
|
||||
# GCN: V_ADD_CO_U32
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP 3
|
||||
# GCN: V_READLANE_B32
|
||||
|
||||
# GCN-LABEL: bb.3:
|
||||
# GCN: V_ADD_CO_U32
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP
|
||||
# GCN: S_NOP 3
|
||||
# GCN: V_WRITELANE_B32
|
||||
|
||||
name: readwrite_lane
|
||||
|
@ -429,17 +406,12 @@ body: |
|
|||
|
||||
# VI-LABEL: bb.0:
|
||||
# VI: V_MOV_B32_e32
|
||||
# VI-NEXT: S_NOP 0
|
||||
# VI-NEXT: S_NOP 0
|
||||
# VI-NEXT: S_NOP 1
|
||||
# VI-NEXT: V_MOV_B32_dpp
|
||||
|
||||
# VI-LABEL: bb.1:
|
||||
# VI: V_CMPX_EQ_I32_e32
|
||||
# VI-NEXT: S_NOP 0
|
||||
# VI-NEXT: S_NOP 0
|
||||
# VI-NEXT: S_NOP 0
|
||||
# VI-NEXT: S_NOP 0
|
||||
# VI-NEXT: S_NOP 0
|
||||
# VI-NEXT: S_NOP 4
|
||||
# VI-NEXT: V_MOV_B32_dpp
|
||||
|
||||
name: dpp
|
||||
|
|
|
@ -7,9 +7,7 @@
|
|||
; VI-LABEL: {{^}}dpp_test:
|
||||
; VI: v_mov_b32_e32 v0, s{{[0-9]+}}
|
||||
; VI-NOOPT: v_mov_b32_e32 v1, s{{[0-9]+}}
|
||||
; PREGFX10-OPT: s_nop 1
|
||||
; PREGFX10-NOOPT: s_nop 0
|
||||
; PREGFX10-NOOPT: s_nop 0
|
||||
; PREGFX10: s_nop 1
|
||||
; VI-OPT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
|
||||
; VI-NOOPT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11]
|
||||
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
|
||||
|
@ -21,14 +19,10 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
|
|||
; VI-LABEL: {{^}}dpp_wait_states:
|
||||
; VI-NOOPT: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s{{[0-9]+}}
|
||||
; VI: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}}
|
||||
; PREGFX10-OPT: s_nop 1
|
||||
; PREGFX10-NOOPT: s_nop 0
|
||||
; PREGFX10-NOOPT: s_nop 0
|
||||
; PREGFX10: s_nop 1
|
||||
; VI-OPT: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
||||
; VI-NOOPT: v_mov_b32_dpp [[VGPR1]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:
|
||||
; PREGFX10-OPT: s_nop 1
|
||||
; PREGFX10-NOOPT: s_nop 0
|
||||
; PREGFX10-NOOPT: s_nop 0
|
||||
; PREGFX10: s_nop 1
|
||||
; VI-OPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
||||
; VI-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
||||
define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
|
||||
|
@ -44,13 +38,10 @@ define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
|
|||
; PREGFX10-OPT: s_mov_b32
|
||||
; PREGFX10-NOOPT: s_waitcnt
|
||||
; PREGFX10-NOOPT: v_mov_b32_e32
|
||||
; PREGFX10-NOOPT-NEXT: s_nop 0
|
||||
; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
||||
; PREGFX10-OPT: s_nop 1
|
||||
; PREGFX10: s_nop 1
|
||||
; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
||||
; PREGFX10-OPT: s_nop 1
|
||||
; PREGFX10-NOOPT: s_nop 0
|
||||
; PREGFX10-NOOPT: s_nop 0
|
||||
; PREGFX10: s_nop 1
|
||||
; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
|
||||
define amdgpu_kernel void @dpp_first_in_bb(float addrspace(1)* %out, float addrspace(1)* %in, float %cond, float %a, float %b) {
|
||||
%cmp = fcmp oeq float %cond, 0.0
|
||||
|
|
|
@ -5,9 +5,7 @@
|
|||
; GCN-LABEL: {{^}}dpp_test:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_nop 1
|
||||
; GFX8-NOOPT: s_nop 0
|
||||
; GFX8-NOOPT-NEXT: s_nop 0
|
||||
; GFX8: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
|
||||
define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
|
||||
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0) #0
|
||||
|
@ -18,9 +16,7 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2)
|
|||
; GCN-LABEL: {{^}}dpp_test_bc:
|
||||
; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
|
||||
; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX8-OPT: s_nop 1
|
||||
; GFX8-NOOPT: s_nop 0
|
||||
; GFX8-NOOPT-NEXT: s_nop 0
|
||||
; GFX8: s_nop 1
|
||||
; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[2,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0{{$}}
|
||||
define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
|
||||
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 2, i32 1, i32 1, i1 1) #0
|
||||
|
@ -34,8 +30,9 @@ define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in
|
|||
; GFX8-OPT: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX8-NOOPT: v_add_u32_e64 [[REG:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX8-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0
|
||||
; GFX8: s_nop 0
|
||||
; GFX8-NEXT: s_nop 0
|
||||
; GFX8-NOOPT: s_nop 1
|
||||
; GFX8-OPT: s_nop 0
|
||||
; GFX8-OPT-NEXT: s_nop 0
|
||||
; GFX8-NEXT: v_mov_b32_dpp {{v[0-9]+}}, [[REG]] quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf
|
||||
@0 = internal unnamed_addr addrspace(3) global [448 x i32] undef, align 4
|
||||
define weak_odr amdgpu_kernel void @dpp_test1(i32* %arg) local_unnamed_addr {
|
||||
|
|
|
@ -3,8 +3,7 @@
|
|||
# GCN-LABEL: name: valu_write_vgpr_mfma_read
|
||||
# GCN: V_MOV_B32
|
||||
# GCN: V_MOV_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: valu_write_vgpr_mfma_read
|
||||
body: |
|
||||
|
@ -17,8 +16,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: valu_write_vgpr_accvgpr_write_read
|
||||
# GCN: V_MOV_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: valu_write_vgpr_accvgpr_write_read
|
||||
body: |
|
||||
|
@ -41,8 +39,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: mfma_write_agpr_mfma_read_overlap
|
||||
# GCN: V_MFMA
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: mfma_write_agpr_mfma_read_overlap
|
||||
body: |
|
||||
|
@ -54,8 +51,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: mfma_write_agpr_mfma_read_partial
|
||||
# GCN: V_MFMA
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: mfma_write_agpr_mfma_read_partial
|
||||
body: |
|
||||
|
@ -67,10 +63,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: mfma_write_agpr_mfma_srca_read_overlap
|
||||
# GCN: V_MFMA
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 3
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: mfma_write_agpr_mfma_srca_read_overlap
|
||||
body: |
|
||||
|
@ -82,10 +75,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: mfma_write_agpr_mfma_srcb_read_overlap
|
||||
# GCN: V_MFMA
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 3
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: mfma_write_agpr_mfma_srcb_read_overlap
|
||||
body: |
|
||||
|
@ -97,10 +87,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: mfma_4x4_write_agpr_accvgpr_read
|
||||
# GCN: V_MFMA_F32_4X4X1F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 3
|
||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||
name: mfma_4x4_write_agpr_accvgpr_read
|
||||
body: |
|
||||
|
@ -112,16 +99,8 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_read
|
||||
# GCN: V_MFMA_F32_16X16X1F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 7
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||
name: mfma_16x16_write_agpr_accvgpr_read
|
||||
body: |
|
||||
|
@ -133,24 +112,9 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_read
|
||||
# GCN: V_MFMA_F32_32X32X2F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 7
|
||||
# GCN-NEXT: S_NOP 7
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||
name: mfma_32x32_write_agpr_accvgpr_read
|
||||
body: |
|
||||
|
@ -174,13 +138,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: mfma_16x16_write_agpr_accvgpr_write
|
||||
# GCN: V_MFMA_F32_16X16X1F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 6
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: mfma_16x16_write_agpr_accvgpr_write
|
||||
body: |
|
||||
|
@ -192,21 +150,8 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: mfma_32x32_write_agpr_accvgpr_write
|
||||
# GCN: V_MFMA_F32_32X32X2F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 7
|
||||
# GCN-NEXT: S_NOP 6
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: mfma_32x32_write_agpr_accvgpr_write
|
||||
body: |
|
||||
|
@ -229,11 +174,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: mfma_16x16_read_srcc_accvgpr_write
|
||||
# GCN: V_MFMA_F32_16X16X1F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 4
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: mfma_16x16_read_srcc_accvgpr_write
|
||||
body: |
|
||||
|
@ -245,19 +186,8 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: mfma_32x32_read_srcc_accvgpr_write
|
||||
# GCN: V_MFMA_F32_32X32X2F32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 7
|
||||
# GCN-NEXT: S_NOP 4
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: mfma_32x32_read_srcc_accvgpr_write
|
||||
body: |
|
||||
|
@ -280,8 +210,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_mfma_read
|
||||
# GCN: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: accvgpr_read_write_vgpr_mfma_read
|
||||
body: |
|
||||
|
@ -293,8 +222,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_accvgpr_write_read
|
||||
# GCN: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: accvgpr_read_write_vgpr_accvgpr_write_read
|
||||
body: |
|
||||
|
@ -318,9 +246,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srca
|
||||
# GCN: V_ACCVGPR_WRITE_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: accvgpr_write_agpr_mfma_read_srca
|
||||
body: |
|
||||
|
@ -332,9 +258,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: accvgpr_write_agpr_mfma_read_srcb
|
||||
# GCN: V_ACCVGPR_WRITE_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: accvgpr_write_agpr_mfma_read_srcb
|
||||
body: |
|
||||
|
@ -346,9 +270,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: accvgpr_write_agpr_accvgpr_read
|
||||
# GCN: V_ACCVGPR_WRITE_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||
name: accvgpr_write_agpr_accvgpr_read
|
||||
body: |
|
||||
|
@ -360,10 +282,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: vcmpx_write_exec_mfma
|
||||
# GCN: V_CMPX_EQ_I32_e32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 3
|
||||
# GCN-NEXT: V_MFMA
|
||||
name: vcmpx_write_exec_mfma
|
||||
body: |
|
||||
|
@ -375,10 +294,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: vcmpx_write_exec_accvgpr_write
|
||||
# GCN: V_CMPX_EQ_I32_e32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 3
|
||||
# GCN-NEXT: V_ACCVGPR_WRITE_B32
|
||||
name: vcmpx_write_exec_accvgpr_write
|
||||
body: |
|
||||
|
@ -390,8 +306,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_load
|
||||
# GCN: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: FLAT_LOAD_DWORD
|
||||
name: accvgpr_read_write_vgpr_load
|
||||
body: |
|
||||
|
@ -403,8 +318,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_ds_permute
|
||||
# GCN: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: DS_PERMUTE_B32
|
||||
name: accvgpr_read_write_vgpr_ds_permute
|
||||
body: |
|
||||
|
@ -416,8 +330,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_flat_load
|
||||
# GCN: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: FLAT_LOAD_DWORD
|
||||
name: accvgpr_read_write_vgpr_flat_load
|
||||
body: |
|
||||
|
@ -429,8 +342,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_buffer_store
|
||||
# GCN: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: BUFFER_STORE_DWORD_OFFSET
|
||||
name: accvgpr_read_write_vgpr_buffer_store
|
||||
body: |
|
||||
|
@ -442,8 +354,7 @@ body: |
|
|||
|
||||
# GCN-LABEL: name: accvgpr_read_write_vgpr_store
|
||||
# GCN: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: DS_WRITE_B32
|
||||
name: accvgpr_read_write_vgpr_store
|
||||
body: |
|
||||
|
@ -497,8 +408,7 @@ body: |
|
|||
# GCN-LABEL: name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
|
||||
# GCN: V_MOV_B32
|
||||
# GCN-NEXT: V_ACCVGPR_READ_B32
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 0
|
||||
# GCN-NEXT: S_NOP 1
|
||||
# GCN-NEXT: FLAT_LOAD_DWORD
|
||||
name: valu_write_vgpr_accvgpr_read_load_2_and_3_depend
|
||||
body: |
|
||||
|
|
|
@ -2,11 +2,7 @@
|
|||
|
||||
# GCN-LABEL: name: vmem_vcc_fallthrough
|
||||
# GCN: bb.1:
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP 4
|
||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_fallthrough
|
||||
|
@ -23,10 +19,7 @@ body: |
|
|||
...
|
||||
# GCN-LABEL: name: vmem_vcc_branch_to_next
|
||||
# GCN: bb.1:
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP 3
|
||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_branch_to_next
|
||||
|
@ -82,10 +75,7 @@ body: |
|
|||
...
|
||||
# GCN-LABEL: name: vmem_vcc_branch_around
|
||||
# GCN: bb.2:
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP 3
|
||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_branch_around
|
||||
|
@ -110,10 +100,7 @@ body: |
|
|||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
# GCN-LABEL: name: vmem_vcc_branch_backedge
|
||||
# GCN: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN: S_NOP 3
|
||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_branch_backedge
|
||||
|
@ -132,11 +119,7 @@ body: |
|
|||
...
|
||||
# GCN-LABEL: name: vmem_vcc_min_of_two
|
||||
# GCN: bb.2:
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP 4
|
||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_min_of_two
|
||||
|
@ -159,10 +142,7 @@ body: |
|
|||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
# GCN-LABEL: name: vmem_vcc_self_loop
|
||||
# GCN: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN: S_NOP 3
|
||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_self_loop
|
||||
|
@ -179,10 +159,7 @@ body: |
|
|||
# GCN-LABEL: name: vmem_vcc_min_of_two_self_loop1
|
||||
# GCN: bb.1:
|
||||
# GCN: $sgpr0 = S_MOV_B32 0
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP 3
|
||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_min_of_two_self_loop1
|
||||
|
@ -205,9 +182,7 @@ body: |
|
|||
# GCN-LABEL: name: vmem_vcc_min_of_two_self_loop2
|
||||
# GCN: bb.1:
|
||||
# GCN: $sgpr0 = S_MOV_B32 0
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP
|
||||
# GCN-NEXT: S_NOP 2
|
||||
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
|
||||
---
|
||||
name: vmem_vcc_min_of_two_self_loop2
|
||||
|
|
Loading…
Reference in New Issue