[libc][automemcpy] Add mean/variance and simplify implementation

Differential Revision: https://reviews.llvm.org/D120031
This commit is contained in:
Guillaume Chatelet 2022-02-17 10:56:25 +00:00
parent 4846568191
commit b254a2a703
3 changed files with 60 additions and 33 deletions

View File

@ -49,9 +49,12 @@ struct FunctionId {
};
struct PerDistributionData {
double MedianBytesPerSecond; // Median of samples for this distribution.
double Score; // Normalized score for this distribution.
Grade::GradeEnum Grade; // Grade for this distribution.
std::vector<double> BytesPerSecondSamples;
double BytesPerSecondMedian; // Median of samples for this distribution.
double BytesPerSecondMean; // Mean of samples for this distribution.
double BytesPerSecondVariance; // Variance of samples for this distribution.
double Score; // Normalized score for this distribution.
Grade::GradeEnum Grade; // Grade for this distribution.
};
struct FunctionData {

View File

@ -76,29 +76,48 @@ Grade::GradeEnum Grade::judge(double Score) {
return BAD;
}
std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
std::unordered_map<SampleId, std::vector<double>, SampleId::Hasher>
BucketedSamples;
for (const auto &S : Samples)
BucketedSamples[S.Id].push_back(S.BytesPerSecond);
std::unordered_map<FunctionId, StringMap<double>, FunctionId::Hasher>
Throughputs;
for (auto &Pair : BucketedSamples) {
const auto &Id = Pair.first;
auto &Values = Pair.second;
const size_t HalfSize = Values.size() / 2;
std::nth_element(Values.begin(), Values.begin() + HalfSize, Values.end());
const double MedianValue = Values[HalfSize];
Throughputs[Id.Function][Id.Distribution.Name] = MedianValue;
static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
const double SampleMean) {
assert(!Samples.empty());
if (Samples.size() == 1)
return 0;
double DiffSquaresSum = 0;
for (const double S : Samples) {
const double Diff = S - SampleMean;
DiffSquaresSum += Diff * Diff;
}
return DiffSquaresSum / (Samples.size() - 1);
}
static void processPerDistributionData(PerDistributionData &Data) {
auto &Samples = Data.BytesPerSecondSamples;
assert(!Samples.empty());
// Sample Mean
const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0);
Data.BytesPerSecondMean = Sum / Samples.size();
// Unbiased Sample Variance
Data.BytesPerSecondVariance =
computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
// Median
const size_t HalfSize = Samples.size() / 2;
std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
Data.BytesPerSecondMedian = Samples[HalfSize];
}
std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
for (const auto &S : Samples) {
auto &Function = Functions[S.Id.Function];
auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
}
std::vector<FunctionData> Output;
for (auto &Pair : Throughputs) {
FunctionData Data;
Data.Id = Pair.first;
for (const auto &Pair : Pair.second)
Data.PerDistributionData[Pair.getKey()].MedianBytesPerSecond =
Pair.getValue();
Output.push_back(std::move(Data));
for (auto &[FunctionId, Function] : Functions) {
Function.Id = FunctionId;
for (auto &Pair : Function.PerDistributionData)
processPerDistributionData(Pair.second);
Output.push_back(std::move(Function));
}
return Output;
}
@ -130,7 +149,7 @@ void fillScores(MutableArrayRef<FunctionData> Functions) {
const FunctionType Type = Function.Id.Type;
for (const auto &Pair : Function.PerDistributionData) {
const auto &Distribution = Pair.getKey();
const double Throughput = Pair.getValue().MedianBytesPerSecond;
const double Throughput = Pair.getValue().BytesPerSecondMedian;
const Key K{Type, Distribution};
ThroughputMinMax[K].update(Throughput);
}
@ -140,7 +159,7 @@ void fillScores(MutableArrayRef<FunctionData> Functions) {
const FunctionType Type = Function.Id.Type;
for (const auto &Pair : Function.PerDistributionData) {
const auto &Distribution = Pair.getKey();
const double Throughput = Pair.getValue().MedianBytesPerSecond;
const double Throughput = Pair.getValue().BytesPerSecondMedian;
const Key K{Type, Distribution};
Function.PerDistributionData[Distribution].Score =
ThroughputMinMax[K].normalize(Throughput);

View File

@ -10,6 +10,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
using testing::DoubleNear;
using testing::ElementsAre;
using testing::Pair;
using testing::SizeIs;
@ -31,8 +32,10 @@ TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsOneSample) {
EXPECT_THAT(Data[0].Id, Foo1);
EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1));
// A single value is provided.
EXPECT_THAT(
Data[0].PerDistributionData.lookup(DistA.Name).MedianBytesPerSecond, 4);
const auto &DistributionData = Data[0].PerDistributionData.lookup(DistA.Name);
EXPECT_THAT(DistributionData.BytesPerSecondMedian, 4);
EXPECT_THAT(DistributionData.BytesPerSecondMean, 4);
EXPECT_THAT(DistributionData.BytesPerSecondVariance, 0);
}
TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsManySamplesSameBucket) {
@ -48,8 +51,10 @@ TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsManySamplesSameBucket) {
EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1));
// When multiple values are provided we pick the median one (here median of 4,
// 5, 5).
EXPECT_THAT(
Data[0].PerDistributionData.lookup(DistA.Name).MedianBytesPerSecond, 5);
const auto &DistributionData = Data[0].PerDistributionData.lookup(DistA.Name);
EXPECT_THAT(DistributionData.BytesPerSecondMedian, 5);
EXPECT_THAT(DistributionData.BytesPerSecondMean, DoubleNear(4.6, 0.1));
EXPECT_THAT(DistributionData.BytesPerSecondVariance, DoubleNear(0.33, 0.01));
}
TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsServeralFunctionAndDist) {
@ -86,11 +91,11 @@ TEST(AutomemcpyJsonResultsAnalyzer, getScore) {
[](const FunctionData &A, const FunctionData &B) { return A.Id < B.Id; });
EXPECT_THAT(Data[0].Id, Foo1);
EXPECT_THAT(Data[0].PerDistributionData.lookup("A").MedianBytesPerSecond, 1);
EXPECT_THAT(Data[0].PerDistributionData.lookup("A").BytesPerSecondMedian, 1);
EXPECT_THAT(Data[1].Id, Foo2);
EXPECT_THAT(Data[1].PerDistributionData.lookup("A").MedianBytesPerSecond, 2);
EXPECT_THAT(Data[1].PerDistributionData.lookup("A").BytesPerSecondMedian, 2);
EXPECT_THAT(Data[2].Id, Foo3);
EXPECT_THAT(Data[2].PerDistributionData.lookup("A").MedianBytesPerSecond, 3);
EXPECT_THAT(Data[2].PerDistributionData.lookup("A").BytesPerSecondMedian, 3);
// Normalizes throughput per distribution.
fillScores(Data);