forked from OSchip/llvm-project
Let targets adjust operand latency of bundles
This reverts the AMDGPU DAG mutation implemented in D72487 and gives a more general way of adjusting BUNDLE operand latency. It also replaces FixBundleLatencyMutation with adjustSchedDependency callback in the AMDGPU, fixing not only successor latencies but predecessors' as well. Differential Revision: https://reviews.llvm.org/D72535
This commit is contained in:
parent
4c00dbf22d
commit
987bf8b6c1
|
@ -270,8 +270,13 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
|
|||
Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
|
||||
RegUse, UseOp));
|
||||
ST.adjustSchedDependency(SU, UseSU, Dep);
|
||||
} else
|
||||
} else {
|
||||
Dep.setLatency(0);
|
||||
// FIXME: We could always let target to adjustSchedDependency(), and
|
||||
// remove this condition, but that currently asserts in Hexagon BE.
|
||||
if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle()))
|
||||
ST.adjustSchedDependency(SU, UseSU, Dep);
|
||||
}
|
||||
|
||||
UseSU->addPred(Dep);
|
||||
}
|
||||
|
|
|
@ -716,6 +716,31 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
|
|||
return MaxNumVGPRs;
|
||||
}
|
||||
|
||||
void GCNSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
|
||||
SDep &Dep) const {
|
||||
if (Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||
|
||||
!Src->isInstr() || !Dst->isInstr())
|
||||
return;
|
||||
|
||||
MachineInstr *SrcI = Src->getInstr();
|
||||
MachineInstr *DstI = Dst->getInstr();
|
||||
|
||||
if (SrcI->isBundle()) {
|
||||
const SIRegisterInfo *TRI = getRegisterInfo();
|
||||
auto Reg = Dep.getReg();
|
||||
MachineBasicBlock::const_instr_iterator I(SrcI->getIterator());
|
||||
MachineBasicBlock::const_instr_iterator E(SrcI->getParent()->instr_end());
|
||||
for (++I; I != E && I->isBundledWithPred(); ++I) {
|
||||
if (!I->modifiesRegister(Reg, TRI))
|
||||
continue;
|
||||
Dep.setLatency(InstrInfo.getInstrLatency(getInstrItineraryData(), *I));
|
||||
break;
|
||||
}
|
||||
} else if (DstI->isBundle()) {
|
||||
Dep.setLatency(InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI));
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct MemOpClusterMutation : ScheduleDAGMutation {
|
||||
const SIInstrInfo *TII;
|
||||
|
@ -764,45 +789,6 @@ struct MemOpClusterMutation : ScheduleDAGMutation {
|
|||
}
|
||||
};
|
||||
|
||||
struct FixBundleLatencyMutation : ScheduleDAGMutation {
|
||||
const SIInstrInfo *TII;
|
||||
|
||||
const TargetSchedModel *TSchedModel;
|
||||
|
||||
FixBundleLatencyMutation(const SIInstrInfo *tii) : TII(tii) {}
|
||||
|
||||
unsigned computeLatency(const MachineInstr &MI, unsigned Reg) const {
|
||||
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
||||
MachineBasicBlock::const_instr_iterator I(MI.getIterator());
|
||||
MachineBasicBlock::const_instr_iterator E(MI.getParent()->instr_end());
|
||||
unsigned Lat = 0;
|
||||
for (++I; I != E && I->isBundledWithPred(); ++I) {
|
||||
if (!I->modifiesRegister(Reg, &TRI))
|
||||
continue;
|
||||
Lat = TSchedModel->computeInstrLatency(&*I);
|
||||
break;
|
||||
}
|
||||
return Lat;
|
||||
}
|
||||
|
||||
void apply(ScheduleDAGInstrs *DAGInstrs) override {
|
||||
ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
|
||||
TSchedModel = DAGInstrs->getSchedModel();
|
||||
if (!TSchedModel || DAG->SUnits.empty())
|
||||
return;
|
||||
|
||||
for (SUnit &SU : DAG->SUnits) {
|
||||
if (!SU.isInstr() || !SU.getInstr()->isBundle())
|
||||
continue;
|
||||
for (SDep &Dep : SU.Succs) {
|
||||
if (Dep.getKind() == SDep::Kind::Data && Dep.getReg())
|
||||
if (unsigned Lat = computeLatency(*SU.getInstr(), Dep.getReg()))
|
||||
Dep.setLatency(Lat);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct FillMFMAShadowMutation : ScheduleDAGMutation {
|
||||
const SIInstrInfo *TII;
|
||||
|
||||
|
@ -929,7 +915,6 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
|
|||
|
||||
void GCNSubtarget::getPostRAMutations(
|
||||
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
|
||||
Mutations.push_back(std::make_unique<FixBundleLatencyMutation>(&InstrInfo));
|
||||
Mutations.push_back(std::make_unique<MemOpClusterMutation>(&InstrInfo));
|
||||
Mutations.push_back(std::make_unique<FillMFMAShadowMutation>(&InstrInfo));
|
||||
}
|
||||
|
|
|
@ -1212,6 +1212,8 @@ public:
|
|||
unsigned getMinWavesPerEU() const override {
|
||||
return AMDGPU::IsaInfo::getMinWavesPerEU(this);
|
||||
}
|
||||
|
||||
void adjustSchedDependency(SUnit *Src, SUnit *Dst, SDep &Dep) const override;
|
||||
};
|
||||
|
||||
class R600Subtarget final : public R600GenSubtargetInfo,
|
||||
|
|
|
@ -1045,7 +1045,7 @@ public:
|
|||
|
||||
unsigned getInstrLatency(const InstrItineraryData *ItinData,
|
||||
const MachineInstr &MI,
|
||||
unsigned *PredCost) const override;
|
||||
unsigned *PredCost = nullptr) const override;
|
||||
};
|
||||
|
||||
/// \brief Returns true if a reg:subreg pair P has a TRC class
|
||||
|
|
Loading…
Reference in New Issue