forked from OSchip/llvm-project
[libc][automemcpy] Add mean/variance and simplify implementation
Differential Revision: https://reviews.llvm.org/D120031
This commit is contained in:
parent
4846568191
commit
b254a2a703
|
@ -49,9 +49,12 @@ struct FunctionId {
|
|||
};
|
||||
|
||||
struct PerDistributionData {
|
||||
double MedianBytesPerSecond; // Median of samples for this distribution.
|
||||
double Score; // Normalized score for this distribution.
|
||||
Grade::GradeEnum Grade; // Grade for this distribution.
|
||||
std::vector<double> BytesPerSecondSamples;
|
||||
double BytesPerSecondMedian; // Median of samples for this distribution.
|
||||
double BytesPerSecondMean; // Mean of samples for this distribution.
|
||||
double BytesPerSecondVariance; // Variance of samples for this distribution.
|
||||
double Score; // Normalized score for this distribution.
|
||||
Grade::GradeEnum Grade; // Grade for this distribution.
|
||||
};
|
||||
|
||||
struct FunctionData {
|
||||
|
|
|
@ -76,29 +76,48 @@ Grade::GradeEnum Grade::judge(double Score) {
|
|||
return BAD;
|
||||
}
|
||||
|
||||
std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
|
||||
std::unordered_map<SampleId, std::vector<double>, SampleId::Hasher>
|
||||
BucketedSamples;
|
||||
for (const auto &S : Samples)
|
||||
BucketedSamples[S.Id].push_back(S.BytesPerSecond);
|
||||
std::unordered_map<FunctionId, StringMap<double>, FunctionId::Hasher>
|
||||
Throughputs;
|
||||
for (auto &Pair : BucketedSamples) {
|
||||
const auto &Id = Pair.first;
|
||||
auto &Values = Pair.second;
|
||||
const size_t HalfSize = Values.size() / 2;
|
||||
std::nth_element(Values.begin(), Values.begin() + HalfSize, Values.end());
|
||||
const double MedianValue = Values[HalfSize];
|
||||
Throughputs[Id.Function][Id.Distribution.Name] = MedianValue;
|
||||
static double computeUnbiasedSampleVariance(const std::vector<double> &Samples,
|
||||
const double SampleMean) {
|
||||
assert(!Samples.empty());
|
||||
if (Samples.size() == 1)
|
||||
return 0;
|
||||
double DiffSquaresSum = 0;
|
||||
for (const double S : Samples) {
|
||||
const double Diff = S - SampleMean;
|
||||
DiffSquaresSum += Diff * Diff;
|
||||
}
|
||||
return DiffSquaresSum / (Samples.size() - 1);
|
||||
}
|
||||
|
||||
static void processPerDistributionData(PerDistributionData &Data) {
|
||||
auto &Samples = Data.BytesPerSecondSamples;
|
||||
assert(!Samples.empty());
|
||||
// Sample Mean
|
||||
const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0);
|
||||
Data.BytesPerSecondMean = Sum / Samples.size();
|
||||
// Unbiased Sample Variance
|
||||
Data.BytesPerSecondVariance =
|
||||
computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean);
|
||||
// Median
|
||||
const size_t HalfSize = Samples.size() / 2;
|
||||
std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end());
|
||||
Data.BytesPerSecondMedian = Samples[HalfSize];
|
||||
}
|
||||
|
||||
std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
|
||||
std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
|
||||
for (const auto &S : Samples) {
|
||||
auto &Function = Functions[S.Id.Function];
|
||||
auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
|
||||
Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
|
||||
}
|
||||
|
||||
std::vector<FunctionData> Output;
|
||||
for (auto &Pair : Throughputs) {
|
||||
FunctionData Data;
|
||||
Data.Id = Pair.first;
|
||||
for (const auto &Pair : Pair.second)
|
||||
Data.PerDistributionData[Pair.getKey()].MedianBytesPerSecond =
|
||||
Pair.getValue();
|
||||
Output.push_back(std::move(Data));
|
||||
for (auto &[FunctionId, Function] : Functions) {
|
||||
Function.Id = FunctionId;
|
||||
for (auto &Pair : Function.PerDistributionData)
|
||||
processPerDistributionData(Pair.second);
|
||||
Output.push_back(std::move(Function));
|
||||
}
|
||||
return Output;
|
||||
}
|
||||
|
@ -130,7 +149,7 @@ void fillScores(MutableArrayRef<FunctionData> Functions) {
|
|||
const FunctionType Type = Function.Id.Type;
|
||||
for (const auto &Pair : Function.PerDistributionData) {
|
||||
const auto &Distribution = Pair.getKey();
|
||||
const double Throughput = Pair.getValue().MedianBytesPerSecond;
|
||||
const double Throughput = Pair.getValue().BytesPerSecondMedian;
|
||||
const Key K{Type, Distribution};
|
||||
ThroughputMinMax[K].update(Throughput);
|
||||
}
|
||||
|
@ -140,7 +159,7 @@ void fillScores(MutableArrayRef<FunctionData> Functions) {
|
|||
const FunctionType Type = Function.Id.Type;
|
||||
for (const auto &Pair : Function.PerDistributionData) {
|
||||
const auto &Distribution = Pair.getKey();
|
||||
const double Throughput = Pair.getValue().MedianBytesPerSecond;
|
||||
const double Throughput = Pair.getValue().BytesPerSecondMedian;
|
||||
const Key K{Type, Distribution};
|
||||
Function.PerDistributionData[Distribution].Score =
|
||||
ThroughputMinMax[K].normalize(Throughput);
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using testing::DoubleNear;
|
||||
using testing::ElementsAre;
|
||||
using testing::Pair;
|
||||
using testing::SizeIs;
|
||||
|
@ -31,8 +32,10 @@ TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsOneSample) {
|
|||
EXPECT_THAT(Data[0].Id, Foo1);
|
||||
EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1));
|
||||
// A single value is provided.
|
||||
EXPECT_THAT(
|
||||
Data[0].PerDistributionData.lookup(DistA.Name).MedianBytesPerSecond, 4);
|
||||
const auto &DistributionData = Data[0].PerDistributionData.lookup(DistA.Name);
|
||||
EXPECT_THAT(DistributionData.BytesPerSecondMedian, 4);
|
||||
EXPECT_THAT(DistributionData.BytesPerSecondMean, 4);
|
||||
EXPECT_THAT(DistributionData.BytesPerSecondVariance, 0);
|
||||
}
|
||||
|
||||
TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsManySamplesSameBucket) {
|
||||
|
@ -48,8 +51,10 @@ TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsManySamplesSameBucket) {
|
|||
EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1));
|
||||
// When multiple values are provided we pick the median one (here median of 4,
|
||||
// 5, 5).
|
||||
EXPECT_THAT(
|
||||
Data[0].PerDistributionData.lookup(DistA.Name).MedianBytesPerSecond, 5);
|
||||
const auto &DistributionData = Data[0].PerDistributionData.lookup(DistA.Name);
|
||||
EXPECT_THAT(DistributionData.BytesPerSecondMedian, 5);
|
||||
EXPECT_THAT(DistributionData.BytesPerSecondMean, DoubleNear(4.6, 0.1));
|
||||
EXPECT_THAT(DistributionData.BytesPerSecondVariance, DoubleNear(0.33, 0.01));
|
||||
}
|
||||
|
||||
TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsServeralFunctionAndDist) {
|
||||
|
@ -86,11 +91,11 @@ TEST(AutomemcpyJsonResultsAnalyzer, getScore) {
|
|||
[](const FunctionData &A, const FunctionData &B) { return A.Id < B.Id; });
|
||||
|
||||
EXPECT_THAT(Data[0].Id, Foo1);
|
||||
EXPECT_THAT(Data[0].PerDistributionData.lookup("A").MedianBytesPerSecond, 1);
|
||||
EXPECT_THAT(Data[0].PerDistributionData.lookup("A").BytesPerSecondMedian, 1);
|
||||
EXPECT_THAT(Data[1].Id, Foo2);
|
||||
EXPECT_THAT(Data[1].PerDistributionData.lookup("A").MedianBytesPerSecond, 2);
|
||||
EXPECT_THAT(Data[1].PerDistributionData.lookup("A").BytesPerSecondMedian, 2);
|
||||
EXPECT_THAT(Data[2].Id, Foo3);
|
||||
EXPECT_THAT(Data[2].PerDistributionData.lookup("A").MedianBytesPerSecond, 3);
|
||||
EXPECT_THAT(Data[2].PerDistributionData.lookup("A").BytesPerSecondMedian, 3);
|
||||
|
||||
// Normalizes throughput per distribution.
|
||||
fillScores(Data);
|
||||
|
|
Loading…
Reference in New Issue