forked from OSchip/llvm-project
[AMDGPU/MemOpsCluster] Code clean-up around mem ops clustering logic
Summary: Clean-up code around mem ops clustering logic. This patch cleans up code within the function clusterNeighboringMemOps(). It is WIP, and this patch is a first cut. Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar Reviewed By: foad Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80119
This commit is contained in:
parent
6f802ec433
commit
09f7dcb64e
|
@ -1580,18 +1580,37 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
|
|||
return;
|
||||
|
||||
llvm::sort(MemOpRecords);
|
||||
|
||||
// At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
|
||||
// cluster mem ops collected within `MemOpRecords` array.
|
||||
unsigned ClusterLength = 1;
|
||||
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
|
||||
SUnit *SUa = MemOpRecords[Idx].SU;
|
||||
SUnit *SUb = MemOpRecords[Idx+1].SU;
|
||||
if (TII->shouldClusterMemOps(MemOpRecords[Idx].BaseOps,
|
||||
MemOpRecords[Idx + 1].BaseOps,
|
||||
ClusterLength + 1)) {
|
||||
// Decision to cluster mem ops is taken based on target dependent logic
|
||||
auto MemOpa = MemOpRecords[Idx];
|
||||
auto MemOpb = MemOpRecords[Idx + 1];
|
||||
++ClusterLength;
|
||||
if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps,
|
||||
ClusterLength)) {
|
||||
// Current mem ops pair could not be clustered, reset cluster length, and
|
||||
// go to next pair
|
||||
ClusterLength = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
SUnit *SUa = MemOpa.SU;
|
||||
SUnit *SUb = MemOpb.SU;
|
||||
if (SUa->NodeNum > SUb->NodeNum)
|
||||
std::swap(SUa, SUb);
|
||||
if (DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
|
||||
|
||||
// FIXME: Is this check really required?
|
||||
if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
|
||||
ClusterLength = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
|
||||
<< SUb->NodeNum << ")\n");
|
||||
|
||||
// Copy successor edges from SUa to SUb. Interleaving computation
|
||||
// dependent on SUa can prevent load combining due to register reuse.
|
||||
// Predecessor edges do not need to be copied from SUb to SUa since
|
||||
|
@ -1599,15 +1618,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
|
|||
for (const SDep &Succ : SUa->Succs) {
|
||||
if (Succ.getSUnit() == SUb)
|
||||
continue;
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
|
||||
LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
|
||||
<< ")\n");
|
||||
DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
|
||||
}
|
||||
++ClusterLength;
|
||||
} else
|
||||
ClusterLength = 1;
|
||||
} else
|
||||
ClusterLength = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue