forked from OSchip/llvm-project
[AMDGPU] Model distance to instruction in bundle
This change allows to model the height of the instruction within a bundle for latency adjustment purposes. Differential Revision: https://reviews.llvm.org/D72669
This commit is contained in:
parent
6d8abe424a
commit
ad741853c3
|
@ -730,14 +730,26 @@ void GCNSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
|
|||
auto Reg = Dep.getReg();
|
||||
MachineBasicBlock::const_instr_iterator I(SrcI->getIterator());
|
||||
MachineBasicBlock::const_instr_iterator E(SrcI->getParent()->instr_end());
|
||||
unsigned Lat = 0;
|
||||
for (++I; I != E && I->isBundledWithPred(); ++I) {
|
||||
if (!I->modifiesRegister(Reg, TRI))
|
||||
continue;
|
||||
Dep.setLatency(InstrInfo.getInstrLatency(getInstrItineraryData(), *I));
|
||||
break;
|
||||
if (I->modifiesRegister(Reg, TRI))
|
||||
Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *I);
|
||||
else if (Lat)
|
||||
--Lat;
|
||||
}
|
||||
Dep.setLatency(Lat);
|
||||
} else if (DstI->isBundle()) {
|
||||
Dep.setLatency(InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI));
|
||||
const SIRegisterInfo *TRI = getRegisterInfo();
|
||||
auto Reg = Dep.getReg();
|
||||
MachineBasicBlock::const_instr_iterator I(DstI->getIterator());
|
||||
MachineBasicBlock::const_instr_iterator E(DstI->getParent()->instr_end());
|
||||
unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI);
|
||||
for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {
|
||||
if (I->readsRegister(Reg, TRI))
|
||||
break;
|
||||
--Lat;
|
||||
}
|
||||
Dep.setLatency(Lat);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=post-RA-sched %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
# Check that we move consumer further from producer, even if one of them is in a bundle.
|
||||
|
||||
---
|
||||
name: src_bundle_latency
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: src_bundle_latency
|
||||
; GCN: $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
|
||||
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
|
||||
; GCN: }
|
||||
; GCN: $vgpr6 = V_ADD_F32_e32 killed $vgpr0, $vgpr0, implicit $exec
|
||||
; GCN: $vgpr5 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $exec
|
||||
$vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
|
||||
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
|
||||
}
|
||||
$vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $exec
|
||||
$vgpr6 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
name: dst_bundle_latency
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: dst_bundle_latency
|
||||
; GCN: $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $exec
|
||||
; GCN: $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $exec
|
||||
; GCN: BUNDLE killed $vgpr0, killed $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
|
||||
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, 0, 0, implicit $exec
|
||||
; GCN: }
|
||||
$vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $exec
|
||||
$vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $exec
|
||||
BUNDLE $vgpr0, $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
|
||||
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
|
||||
}
|
||||
...
|
Loading…
Reference in New Issue