forked from OSchip/llvm-project
205 lines
6.7 KiB
C++
205 lines
6.7 KiB
C++
//===-- Analyze benchmark JSON files --------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
// This code analyzes the json file produced by the `automemcpy` binary.
|
|
//
|
|
// As a remainder, `automemcpy` will benchmark each autogenerated memory
|
|
// functions against one of the predefined distributions available in the
|
|
// `libc/benchmarks/distributions` folder.
|
|
//
|
|
// It works as follows:
|
|
// - Reads one or more json files.
|
|
// - If there are several runs for the same function and distribution, picks the
|
|
// median throughput (aka `BytesPerSecond`).
|
|
// - Aggregates the throughput per distributions and scores them from worst (0)
|
|
// to best (1).
|
|
// - Each distribution categorizes each function into one of the following
|
|
// categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE,
|
|
// BAD.
|
|
// - A process similar to the Majority Judgment voting system is used to `elect`
|
|
// the best function. The histogram of grades is returned so we can
|
|
// distinguish between functions with the same final grade. In the following
|
|
// example both functions grade EXCELLENT but we may prefer the second one.
|
|
//
|
|
// | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ...
|
|
// |------------|-----------|-----------|------|----------| ...
|
|
// | Function_1 | 7 | 1 | 2 | | ...
|
|
// | Function_2 | 6 | 4 | | | ...
|
|
|
|
#include "automemcpy/ResultAnalyzer.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include <numeric>
|
|
#include <unordered_map>
|
|
|
|
namespace llvm {
|
|
|
|
namespace automemcpy {
|
|
|
|
StringRef Grade::getString(const GradeEnum &GE) {
|
|
switch (GE) {
|
|
case EXCELLENT:
|
|
return "EXCELLENT";
|
|
case VERY_GOOD:
|
|
return "VERY_GOOD";
|
|
case GOOD:
|
|
return "GOOD";
|
|
case PASSABLE:
|
|
return "PASSABLE";
|
|
case INADEQUATE:
|
|
return "INADEQUATE";
|
|
case MEDIOCRE:
|
|
return "MEDIOCRE";
|
|
case BAD:
|
|
return "BAD";
|
|
case ARRAY_SIZE:
|
|
report_fatal_error("logic error");
|
|
}
|
|
}
|
|
|
|
Grade::GradeEnum Grade::judge(double Score) {
|
|
if (Score >= 6. / 7)
|
|
return EXCELLENT;
|
|
if (Score >= 5. / 7)
|
|
return VERY_GOOD;
|
|
if (Score >= 4. / 7)
|
|
return GOOD;
|
|
if (Score >= 3. / 7)
|
|
return PASSABLE;
|
|
if (Score >= 2. / 7)
|
|
return INADEQUATE;
|
|
if (Score >= 1. / 7)
|
|
return MEDIOCRE;
|
|
return BAD;
|
|
}
|
|
|
|
static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
|
|
const double SampleMean) {
|
|
assert(!Samples.empty());
|
|
if (Samples.size() == 1)
|
|
return 0;
|
|
double DiffSquaresSum = 0;
|
|
for (const double S : Samples) {
|
|
const double Diff = S - SampleMean;
|
|
DiffSquaresSum += Diff * Diff;
|
|
}
|
|
return DiffSquaresSum / (Samples.size() - 1);
|
|
}
|
|
|
|
static void processPerDistributionData(PerDistributionData &Data) {
|
|
auto &Samples = Data.BytesPerSecondSamples;
|
|
assert(!Samples.empty());
|
|
// Sample Mean
|
|
const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0);
|
|
Data.BytesPerSecondMean = Sum / Samples.size();
|
|
// Unbiased Sample Variance
|
|
Data.BytesPerSecondVariance =
|
|
computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
|
|
// Median
|
|
const size_t HalfSize = Samples.size() / 2;
|
|
std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
|
|
Data.BytesPerSecondMedian = Samples[HalfSize];
|
|
}
|
|
|
|
std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
|
|
std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
|
|
for (const auto &S : Samples) {
|
|
if (S.Type != SampleType::ITERATION)
|
|
break;
|
|
auto &Function = Functions[S.Id.Function];
|
|
auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
|
|
Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
|
|
}
|
|
|
|
std::vector<FunctionData> Output;
|
|
for (auto &[FunctionId, Function] : Functions) {
|
|
Function.Id = FunctionId;
|
|
for (auto &Pair : Function.PerDistributionData)
|
|
processPerDistributionData(Pair.second);
|
|
Output.push_back(std::move(Function));
|
|
}
|
|
return Output;
|
|
}
|
|
|
|
void fillScores(MutableArrayRef<FunctionData> Functions) {
|
|
// A key to bucket throughput per function type and distribution.
|
|
struct Key {
|
|
FunctionType Type;
|
|
StringRef Distribution;
|
|
|
|
COMPARABLE_AND_HASHABLE(Key, Type, Distribution)
|
|
};
|
|
|
|
// Tracks minimum and maximum values.
|
|
struct MinMax {
|
|
double Min = std::numeric_limits<double>::max();
|
|
double Max = std::numeric_limits<double>::min();
|
|
void update(double Value) {
|
|
if (Value < Min)
|
|
Min = Value;
|
|
if (Value > Max)
|
|
Max = Value;
|
|
}
|
|
double normalize(double Value) const { return (Value - Min) / (Max - Min); }
|
|
};
|
|
|
|
std::unordered_map<Key, MinMax, Key::Hasher> ThroughputMinMax;
|
|
for (const auto &Function : Functions) {
|
|
const FunctionType Type = Function.Id.Type;
|
|
for (const auto &Pair : Function.PerDistributionData) {
|
|
const auto &Distribution = Pair.getKey();
|
|
const double Throughput = Pair.getValue().BytesPerSecondMedian;
|
|
const Key K{Type, Distribution};
|
|
ThroughputMinMax[K].update(Throughput);
|
|
}
|
|
}
|
|
|
|
for (auto &Function : Functions) {
|
|
const FunctionType Type = Function.Id.Type;
|
|
for (const auto &Pair : Function.PerDistributionData) {
|
|
const auto &Distribution = Pair.getKey();
|
|
const double Throughput = Pair.getValue().BytesPerSecondMedian;
|
|
const Key K{Type, Distribution};
|
|
Function.PerDistributionData[Distribution].Score =
|
|
ThroughputMinMax[K].normalize(Throughput);
|
|
}
|
|
}
|
|
}
|
|
|
|
void castVotes(MutableArrayRef<FunctionData> Functions) {
|
|
for (FunctionData &Function : Functions) {
|
|
Function.ScoresGeoMean = 1.0;
|
|
for (const auto &Pair : Function.PerDistributionData) {
|
|
const StringRef Distribution = Pair.getKey();
|
|
const double Score = Pair.getValue().Score;
|
|
Function.ScoresGeoMean *= Score;
|
|
const auto G = Grade::judge(Score);
|
|
++(Function.GradeHisto[G]);
|
|
Function.PerDistributionData[Distribution].Grade = G;
|
|
}
|
|
}
|
|
|
|
for (FunctionData &Function : Functions) {
|
|
const auto &GradeHisto = Function.GradeHisto;
|
|
const size_t Votes =
|
|
std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U);
|
|
const size_t MedianVote = Votes / 2;
|
|
size_t CountedVotes = 0;
|
|
Grade::GradeEnum MedianGrade = Grade::BAD;
|
|
for (size_t I = 0; I < GradeHisto.size(); ++I) {
|
|
CountedVotes += GradeHisto[I];
|
|
if (CountedVotes > MedianVote) {
|
|
MedianGrade = Grade::GradeEnum(I);
|
|
break;
|
|
}
|
|
}
|
|
Function.FinalGrade = MedianGrade;
|
|
}
|
|
}
|
|
|
|
} // namespace automemcpy
|
|
} // namespace llvm
|