forked from OSchip/llvm-project
[AMDGPU/MemOpsCluster] Code clean-up around mem ops clustering logic
Summary: Clean-up code around mem ops clustering logic. This patch cleans up code within the function clusterNeighboringMemOps(). It is WIP, and this patch is a first cut. Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar Reviewed By: foad Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80119
This commit is contained in:
parent
6f802ec433
commit
09f7dcb64e
|
@ -1580,18 +1580,37 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
|
||||||
return;
|
return;
|
||||||
|
|
||||||
llvm::sort(MemOpRecords);
|
llvm::sort(MemOpRecords);
|
||||||
|
|
||||||
|
// At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
|
||||||
|
// cluster mem ops collected within `MemOpRecords` array.
|
||||||
unsigned ClusterLength = 1;
|
unsigned ClusterLength = 1;
|
||||||
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
|
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
|
||||||
SUnit *SUa = MemOpRecords[Idx].SU;
|
// Decision to cluster mem ops is taken based on target dependent logic
|
||||||
SUnit *SUb = MemOpRecords[Idx+1].SU;
|
auto MemOpa = MemOpRecords[Idx];
|
||||||
if (TII->shouldClusterMemOps(MemOpRecords[Idx].BaseOps,
|
auto MemOpb = MemOpRecords[Idx + 1];
|
||||||
MemOpRecords[Idx + 1].BaseOps,
|
++ClusterLength;
|
||||||
ClusterLength + 1)) {
|
if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps,
|
||||||
|
ClusterLength)) {
|
||||||
|
// Current mem ops pair could not be clustered, reset cluster length, and
|
||||||
|
// go to next pair
|
||||||
|
ClusterLength = 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
SUnit *SUa = MemOpa.SU;
|
||||||
|
SUnit *SUb = MemOpb.SU;
|
||||||
if (SUa->NodeNum > SUb->NodeNum)
|
if (SUa->NodeNum > SUb->NodeNum)
|
||||||
std::swap(SUa, SUb);
|
std::swap(SUa, SUb);
|
||||||
if (DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
|
|
||||||
|
// FIXME: Is this check really required?
|
||||||
|
if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
|
||||||
|
ClusterLength = 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
|
LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
|
||||||
<< SUb->NodeNum << ")\n");
|
<< SUb->NodeNum << ")\n");
|
||||||
|
|
||||||
// Copy successor edges from SUa to SUb. Interleaving computation
|
// Copy successor edges from SUa to SUb. Interleaving computation
|
||||||
// dependent on SUa can prevent load combining due to register reuse.
|
// dependent on SUa can prevent load combining due to register reuse.
|
||||||
// Predecessor edges do not need to be copied from SUb to SUa since
|
// Predecessor edges do not need to be copied from SUb to SUa since
|
||||||
|
@ -1599,15 +1618,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
|
||||||
for (const SDep &Succ : SUa->Succs) {
|
for (const SDep &Succ : SUa->Succs) {
|
||||||
if (Succ.getSUnit() == SUb)
|
if (Succ.getSUnit() == SUb)
|
||||||
continue;
|
continue;
|
||||||
LLVM_DEBUG(dbgs()
|
LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
|
||||||
<< " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n");
|
<< ")\n");
|
||||||
DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
|
DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
|
||||||
}
|
}
|
||||||
++ClusterLength;
|
|
||||||
} else
|
|
||||||
ClusterLength = 1;
|
|
||||||
} else
|
|
||||||
ClusterLength = 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue