forked from OSchip/llvm-project
PGO: Scale large counters down to 32-bits
PGO counters are 64-bit and branch weights are 32-bit. Scale them down when necessary, instead of just taking the lower 32 bits. <rdar://problem/16276448> llvm-svn: 203592
This commit is contained in:
parent
f164d9404d
commit
38402dc917
|
@ -872,29 +872,59 @@ void CodeGenPGO::destroyRegionCounters() {
|
|||
delete RegionCounts;
|
||||
}
|
||||
|
||||
/// \brief Calculate what to divide by to scale weights.
|
||||
///
|
||||
/// Given the maximum weight, calculate a divisor that will scale all the
|
||||
/// weights to strictly less than UINT32_MAX.
|
||||
static uint64_t calculateWeightScale(uint64_t MaxWeight) {
|
||||
return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
|
||||
}
|
||||
|
||||
/// \brief Scale an individual branch weight (and add 1).
|
||||
///
|
||||
/// Scale a 64-bit weight down to 32-bits using \c Scale.
|
||||
///
|
||||
/// According to Laplace's Rule of Succession, it is better to compute the
|
||||
/// weight based on the count plus 1, so universally add 1 to the value.
|
||||
///
|
||||
/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
|
||||
/// greater than \c Weight.
|
||||
static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
|
||||
assert(Scale && "scale by 0?");
|
||||
uint64_t Scaled = Weight / Scale + 1;
|
||||
assert(Scaled <= UINT32_MAX && "overflow 32-bits");
|
||||
return Scaled;
|
||||
}
|
||||
|
||||
llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount,
|
||||
uint64_t FalseCount) {
|
||||
// Check for empty weights.
|
||||
if (!TrueCount && !FalseCount)
|
||||
return 0;
|
||||
|
||||
// Calculate how to scale down to 32-bits.
|
||||
uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
|
||||
|
||||
llvm::MDBuilder MDHelper(CGM.getLLVMContext());
|
||||
// TODO: need to scale down to 32-bits
|
||||
// According to Laplace's Rule of Succession, it is better to compute the
|
||||
// weight based on the count plus 1.
|
||||
return MDHelper.createBranchWeights(TrueCount + 1, FalseCount + 1);
|
||||
return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
|
||||
scaleBranchWeight(FalseCount, Scale));
|
||||
}
|
||||
|
||||
llvm::MDNode *CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
|
||||
llvm::MDBuilder MDHelper(CGM.getLLVMContext());
|
||||
// TODO: need to scale down to 32-bits, instead of just truncating.
|
||||
// According to Laplace's Rule of Succession, it is better to compute the
|
||||
// weight based on the count plus 1.
|
||||
// We need at least two elements to create meaningful weights.
|
||||
if (Weights.size() < 2)
|
||||
return 0;
|
||||
|
||||
// Calculate how to scale down to 32-bits.
|
||||
uint64_t Scale = calculateWeightScale(*std::max_element(Weights.begin(),
|
||||
Weights.end()));
|
||||
|
||||
SmallVector<uint32_t, 16> ScaledWeights;
|
||||
ScaledWeights.reserve(Weights.size());
|
||||
for (ArrayRef<uint64_t>::iterator WI = Weights.begin(), WE = Weights.end();
|
||||
WI != WE; ++WI) {
|
||||
ScaledWeights.push_back(*WI + 1);
|
||||
}
|
||||
for (uint64_t W : Weights)
|
||||
ScaledWeights.push_back(scaleBranchWeight(W, Scale));
|
||||
|
||||
llvm::MDBuilder MDHelper(CGM.getLLVMContext());
|
||||
return MDHelper.createBranchWeights(ScaledWeights);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
main 8
|
||||
1
|
||||
68719476720
|
||||
64424509425
|
||||
68719476720
|
||||
21474836475
|
||||
21474836475
|
||||
21474836475
|
||||
4294967295
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
// Test that big branch weights get scaled down to 32-bits, rather than just
|
||||
// truncated.
|
||||
|
||||
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-counter-overflows.c %s -o - -emit-llvm -fprofile-instr-use=%S/Inputs/c-counter-overflows.profdata | FileCheck %s
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// PGOGEN: @[[MAIN:__llvm_pgo_ctr[0-9]*]] = private global [2 x i64] zeroinitializer
|
||||
int main(int argc, const char *argv[]) {
|
||||
// Need counts higher than 32-bits.
|
||||
// CHECK: br {{.*}} !prof ![[FOR:[0-9]+]]
|
||||
// max = 0xffffffff0
|
||||
// scale = 0xffffffff0 / 0xffffffff + 1 = 17
|
||||
// loop-body: 0xffffffff0 / 17 + 1 = 0xf0f0f0f0 + 1 = 4042322161 => -252645135
|
||||
// loop-exit: 0x000000001 / 17 + 1 = 0x00000000 + 1 = 1 => 1
|
||||
for (uint64_t I = 0; I < 0xffffffff0; ++I) {
|
||||
// max = 0xffffffff * 15 = 0xefffffff1
|
||||
// scale = 0xefffffff1 / 0xffffffff + 1 = 16
|
||||
// CHECK: br {{.*}} !prof ![[IF:[0-9]+]]
|
||||
if (I & 0xf) {
|
||||
// 0xefffffff1 / 16 + 1 = 0xefffffff + 1 = 4026531840 => -268435456
|
||||
} else {
|
||||
// 0x0ffffffff / 16 + 1 = 0x0fffffff + 1 = 268435456 => 268435456
|
||||
}
|
||||
|
||||
// max = 0xffffffff * 5 = 0x4fffffffb
|
||||
// scale = 0x4fffffffb / 0xffffffff + 1 = 6
|
||||
// CHECK: ], !prof ![[SWITCH:[0-9]+]]
|
||||
switch ((I & 0xf) / 5) {
|
||||
case 0:
|
||||
// 0x4fffffffb / 6 = 0xd5555554 + 1 = 3579139413 => -715827883
|
||||
break;
|
||||
case 1:
|
||||
// 0x4fffffffb / 6 = 0xd5555554 + 1 = 3579139413 => -715827883
|
||||
break;
|
||||
case 2:
|
||||
// 0x4fffffffb / 6 = 0xd5555554 + 1 = 3579139413 => -715827883
|
||||
break;
|
||||
default:
|
||||
// 0x0ffffffff / 6 = 0x2aaaaaaa + 1 = 715827883 => 715827883
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CHECK-DAG: ![[FOR]] = metadata !{metadata !"branch_weights", i32 -252645135, i32 1}
|
||||
// CHECK-DAG: ![[IF]] = metadata !{metadata !"branch_weights", i32 -268435456, i32 268435456}
|
||||
// CHECK-DAG: ![[SWITCH]] = metadata !{metadata !"branch_weights", i32 715827883, i32 -715827883, i32 -715827883, i32 -715827883}
|
Loading…
Reference in New Issue