forked from OSchip/llvm-project
[llvm-exegesis] InstructionBenchmarkClustering::rangeQuery(): write into llvm::SmallVectorImpl& output parameter
Summary: I do believe this is the correct fix. We call `rangeQuery()` *very* often. And many times it's output vector is large (tens of thousands entries), so small-size-opt won't help. Old: (D54389) ``` Performance counter stats for './bin/llvm-exegesis -mode=analysis -analysis-epsilon=100000 -benchmarks-file=/tmp/benchmarks.yaml -analysis-inconsistencies-output-file=/tmp/clusters.html' (10 runs): 7934.528363 task-clock (msec) # 1.000 CPUs utilized ( +- 0.19% ) ... 7.9354 +- 0.0148 seconds time elapsed ( +- 0.19% ) ``` New: ``` Performance counter stats for './bin/llvm-exegesis -mode=analysis -analysis-epsilon=100000 -benchmarks-file=/tmp/benchmarks.yaml -analysis-inconsistencies-output-file=/tmp/clusters.html' (10 runs): 7383.793440 task-clock (msec) # 1.000 CPUs utilized ( +- 0.47% ) ... 7.3868 +- 0.0340 seconds time elapsed ( +- 0.46% ) ``` And another -7%. And that isn't even the good bit yet. Old: * calls to allocation functions: 2081419 * temporary allocations: 219658 (10.55%) * bytes allocated in total (ignoring deallocations): 4.31 GB New: * calls to allocation functions: 1880295 (-10%) * temporary allocations: 18758 (1%) (-91% *sic*) * bytes allocated in total (ignoring deallocations): 545.15 MB (-88% *sic*) Reviewers: courbet, MaskRay, RKSimon, gchatelet, john.brawn Reviewed By: courbet, MaskRay Subscribers: tschuett, llvm-commits Differential Revision: https://reviews.llvm.org/D54390 llvm-svn: 347202
This commit is contained in:
parent
5c5b1ea725
commit
666d855fbb
|
@ -33,9 +33,9 @@ namespace exegesis {
|
||||||
|
|
||||||
// Finds the points at distance less than sqrt(EpsilonSquared) of Q (not
|
// Finds the points at distance less than sqrt(EpsilonSquared) of Q (not
|
||||||
// including Q).
|
// including Q).
|
||||||
llvm::SmallVector<size_t, 0>
|
void InstructionBenchmarkClustering::rangeQuery(
|
||||||
InstructionBenchmarkClustering::rangeQuery(const size_t Q) const {
|
const size_t Q, llvm::SmallVectorImpl<size_t> &Neighbors) const {
|
||||||
llvm::SmallVector<size_t, 0> Neighbors;
|
Neighbors.clear();
|
||||||
const auto &QMeasurements = Points_[Q].Measurements;
|
const auto &QMeasurements = Points_[Q].Measurements;
|
||||||
for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
|
for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
|
||||||
if (P == Q)
|
if (P == Q)
|
||||||
|
@ -47,7 +47,6 @@ InstructionBenchmarkClustering::rangeQuery(const size_t Q) const {
|
||||||
Neighbors.push_back(P);
|
Neighbors.push_back(P);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Neighbors;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool InstructionBenchmarkClustering::isNeighbour(
|
bool InstructionBenchmarkClustering::isNeighbour(
|
||||||
|
@ -103,10 +102,11 @@ llvm::Error InstructionBenchmarkClustering::validateAndSetup() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void InstructionBenchmarkClustering::dbScan(const size_t MinPts) {
|
void InstructionBenchmarkClustering::dbScan(const size_t MinPts) {
|
||||||
|
llvm::SmallVector<size_t, 0> Neighbors; // Persistent buffer to avoid allocs.
|
||||||
for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
|
for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
|
||||||
if (!ClusterIdForPoint_[P].isUndef())
|
if (!ClusterIdForPoint_[P].isUndef())
|
||||||
continue; // Previously processed in inner loop.
|
continue; // Previously processed in inner loop.
|
||||||
const auto Neighbors = rangeQuery(P);
|
rangeQuery(P, Neighbors);
|
||||||
if (Neighbors.size() + 1 < MinPts) { // Density check.
|
if (Neighbors.size() + 1 < MinPts) { // Density check.
|
||||||
// The region around P is not dense enough to create a new cluster, mark
|
// The region around P is not dense enough to create a new cluster, mark
|
||||||
// as noise for now.
|
// as noise for now.
|
||||||
|
@ -141,7 +141,7 @@ void InstructionBenchmarkClustering::dbScan(const size_t MinPts) {
|
||||||
ClusterIdForPoint_[Q] = CurrentCluster.Id;
|
ClusterIdForPoint_[Q] = CurrentCluster.Id;
|
||||||
CurrentCluster.PointIndices.push_back(Q);
|
CurrentCluster.PointIndices.push_back(Q);
|
||||||
// And extend to the neighbors of Q if the region is dense enough.
|
// And extend to the neighbors of Q if the region is dense enough.
|
||||||
const auto Neighbors = rangeQuery(Q);
|
rangeQuery(Q, Neighbors);
|
||||||
if (Neighbors.size() + 1 >= MinPts) {
|
if (Neighbors.size() + 1 >= MinPts) {
|
||||||
ToProcess.insert(Neighbors.begin(), Neighbors.end());
|
ToProcess.insert(Neighbors.begin(), Neighbors.end());
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,7 +97,7 @@ private:
|
||||||
const std::vector<InstructionBenchmark> &Points, double EpsilonSquared);
|
const std::vector<InstructionBenchmark> &Points, double EpsilonSquared);
|
||||||
llvm::Error validateAndSetup();
|
llvm::Error validateAndSetup();
|
||||||
void dbScan(size_t MinPts);
|
void dbScan(size_t MinPts);
|
||||||
llvm::SmallVector<size_t, 0> rangeQuery(size_t Q) const;
|
void rangeQuery(size_t Q, llvm::SmallVectorImpl<size_t> &Scratchpad) const;
|
||||||
|
|
||||||
const std::vector<InstructionBenchmark> &Points_;
|
const std::vector<InstructionBenchmark> &Points_;
|
||||||
const double EpsilonSquared_;
|
const double EpsilonSquared_;
|
||||||
|
|
Loading…
Reference in New Issue