[AMDGPU] Model distance to instruction in bundle

This change allows to model the height of the instruction
within a bundle for latency adjustment purposes.

Differential Revision: https://reviews.llvm.org/D72669
This commit is contained in:
Stanislav Mekhanoshin 2020-01-13 17:01:36 -08:00
parent 6d8abe424a
commit ad741853c3
2 changed files with 61 additions and 5 deletions

View File

@ -730,14 +730,26 @@ void GCNSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
auto Reg = Dep.getReg();
MachineBasicBlock::const_instr_iterator I(SrcI->getIterator());
MachineBasicBlock::const_instr_iterator E(SrcI->getParent()->instr_end());
unsigned Lat = 0;
for (++I; I != E && I->isBundledWithPred(); ++I) {
if (!I->modifiesRegister(Reg, TRI))
continue;
Dep.setLatency(InstrInfo.getInstrLatency(getInstrItineraryData(), *I));
break;
if (I->modifiesRegister(Reg, TRI))
Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *I);
else if (Lat)
--Lat;
}
Dep.setLatency(Lat);
} else if (DstI->isBundle()) {
Dep.setLatency(InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI));
const SIRegisterInfo *TRI = getRegisterInfo();
auto Reg = Dep.getReg();
MachineBasicBlock::const_instr_iterator I(DstI->getIterator());
MachineBasicBlock::const_instr_iterator E(DstI->getParent()->instr_end());
unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI);
for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {
if (I->readsRegister(Reg, TRI))
break;
--Lat;
}
Dep.setLatency(Lat);
}
}

View File

@ -0,0 +1,44 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=post-RA-sched %s -o - | FileCheck -check-prefix=GCN %s
# Check that we move consumer further from producer, even if one of them is in a bundle.
---
name: src_bundle_latency
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: src_bundle_latency
; GCN: $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
; GCN: }
; GCN: $vgpr6 = V_ADD_F32_e32 killed $vgpr0, $vgpr0, implicit $exec
; GCN: $vgpr5 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $exec
$vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
$vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
}
$vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $exec
$vgpr6 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec
...
---
name: dst_bundle_latency
tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: dst_bundle_latency
; GCN: $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $exec
; GCN: $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $exec
; GCN: BUNDLE killed $vgpr0, killed $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, 0, 0, implicit $exec
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, 0, 0, implicit $exec
; GCN: }
$vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $exec
$vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $exec
BUNDLE $vgpr0, $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
}
...