[AMDGPU/MemOpsCluster] Code clean-up around mem ops clustering logic

Summary:
Clean-up code around mem ops clustering logic. This patch cleans up code within
the function clusterNeighboringMemOps(). It is WIP, and this patch is a first cut.

Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar

Reviewed By: foad

Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80119
This commit is contained in:
hsmahesha 2020-05-26 15:47:03 +05:30
parent 6f802ec433
commit 09f7dcb64e
1 changed files with 39 additions and 25 deletions

View File

@ -1580,18 +1580,37 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
return; return;
llvm::sort(MemOpRecords); llvm::sort(MemOpRecords);
// At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
// cluster mem ops collected within `MemOpRecords` array.
unsigned ClusterLength = 1; unsigned ClusterLength = 1;
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
SUnit *SUa = MemOpRecords[Idx].SU; // Decision to cluster mem ops is taken based on target dependent logic
SUnit *SUb = MemOpRecords[Idx+1].SU; auto MemOpa = MemOpRecords[Idx];
if (TII->shouldClusterMemOps(MemOpRecords[Idx].BaseOps, auto MemOpb = MemOpRecords[Idx + 1];
MemOpRecords[Idx + 1].BaseOps, ++ClusterLength;
ClusterLength + 1)) { if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps,
ClusterLength)) {
// Current mem ops pair could not be clustered, reset cluster length, and
// go to next pair
ClusterLength = 1;
continue;
}
SUnit *SUa = MemOpa.SU;
SUnit *SUb = MemOpb.SU;
if (SUa->NodeNum > SUb->NodeNum) if (SUa->NodeNum > SUb->NodeNum)
std::swap(SUa, SUb); std::swap(SUa, SUb);
if (DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
// FIXME: Is this check really required?
if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
ClusterLength = 1;
continue;
}
LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
<< SUb->NodeNum << ")\n"); << SUb->NodeNum << ")\n");
// Copy successor edges from SUa to SUb. Interleaving computation // Copy successor edges from SUa to SUb. Interleaving computation
// dependent on SUa can prevent load combining due to register reuse. // dependent on SUa can prevent load combining due to register reuse.
// Predecessor edges do not need to be copied from SUb to SUa since // Predecessor edges do not need to be copied from SUb to SUa since
@ -1599,15 +1618,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
for (const SDep &Succ : SUa->Succs) { for (const SDep &Succ : SUa->Succs) {
if (Succ.getSUnit() == SUb) if (Succ.getSUnit() == SUb)
continue; continue;
LLVM_DEBUG(dbgs() LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
<< " Copy Succ SU(" << Succ.getSUnit()->NodeNum << ")\n"); << ")\n");
DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
} }
++ClusterLength;
} else
ClusterLength = 1;
} else
ClusterLength = 1;
} }
} }