[PGO] Fix computation of function Hash

And bump its version number accordingly.

This is a patched recommit of 7c298c104b

Previous hash implementation was incorrectly passing an uint64_t, that got converted
to an uint8_t, to finalize the hash computation. This led to different functions
having the same hash if they only differ by the remaining statements, which is
incorrect.

Added a new test case that trivially tests that a small function change is
reflected in the hash value.

Not that as this patch fixes the hash computation, it would invalidate all hashes
computed before that patch applies, this is why we bumped the version number.

Update profile data hash entries due to hash function update, except for binary
version, in which case we keep the buggy behavior for backward compatibility.

Differential Revision: https://reviews.llvm.org/D79961
This commit is contained in:
serge-sans-paille 2020-05-25 20:44:35 +02:00
parent 84cf8ed8fd
commit de02a75e39
15 changed files with 65 additions and 24 deletions

View File

@ -82,6 +82,10 @@ Non-comprehensive list of changes in this release
linker. If the user links the program with the ``clang`` or ``clang-cl``
drivers, the driver will pass this flag for them.
- Clang's profile files generated through ``-fprofile-instr-generate`` are using
a fixed hashing algorithm that prevents some collision when loading
out-of-date profile informations. Clang can still read old profile files.
New Compiler Flags
------------------

View File

@ -52,9 +52,10 @@ void CodeGenPGO::setFuncName(llvm::Function *Fn) {
enum PGOHashVersion : unsigned {
PGO_HASH_V1,
PGO_HASH_V2,
PGO_HASH_V3,
// Keep this set to the latest hash version.
PGO_HASH_LATEST = PGO_HASH_V2
PGO_HASH_LATEST = PGO_HASH_V3
};
namespace {
@ -122,7 +123,7 @@ public:
BinaryOperatorGE,
BinaryOperatorEQ,
BinaryOperatorNE,
// The preceding values are available with PGO_HASH_V2.
// The preceding values are available since PGO_HASH_V2.
// Keep this last. It's for the static assert that follows.
LastHashType
@ -144,7 +145,9 @@ static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
CodeGenModule &CGM) {
if (PGOReader->getVersion() <= 4)
return PGO_HASH_V1;
return PGO_HASH_V2;
if (PGOReader->getVersion() <= 5)
return PGO_HASH_V2;
return PGO_HASH_V3;
}
/// A RecursiveASTVisitor that fills a map of statements to PGO counters.
@ -288,7 +291,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
return PGOHash::BinaryOperatorLAnd;
if (BO->getOpcode() == BO_LOr)
return PGOHash::BinaryOperatorLOr;
if (HashVersion == PGO_HASH_V2) {
if (HashVersion >= PGO_HASH_V2) {
switch (BO->getOpcode()) {
default:
break;
@ -310,7 +313,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
}
}
if (HashVersion == PGO_HASH_V2) {
if (HashVersion >= PGO_HASH_V2) {
switch (S->getStmtClass()) {
default:
break;
@ -747,13 +750,21 @@ uint64_t PGOHash::finalize() {
return Working;
// Check for remaining work in Working.
if (Working)
MD5.update(Working);
if (Working) {
// Keep the buggy behavior from v1 and v2 for backward-compatibility. This
// is buggy because it converts a uint64_t into an array of uint8_t.
if (HashVersion < PGO_HASH_V3) {
MD5.update({(uint8_t)Working});
} else {
using namespace llvm::support;
uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
}
}
// Finalize the MD5 and return the hash.
llvm::MD5::MD5Result Result;
MD5.final(Result);
using namespace llvm::support;
return Result.low();
}

View File

@ -1,5 +1,5 @@
main
10111551811706059223
7779561829442898616
8
1
68719476720

Binary file not shown.

View File

@ -7,7 +7,7 @@ simple_loops
75
conditionals
4190663230902537370
4904767535850050386
11
1
100
@ -22,7 +22,7 @@ conditionals
100
early_exits
8265526549255474475
2880354649761471549
9
1
0
@ -35,7 +35,7 @@ early_exits
0
jumps
15872630527555456493
15051420506203462683
22
1
1
@ -61,7 +61,7 @@ jumps
9
switches
11892326508727782373
43242458792028222
19
1
1
@ -84,7 +84,7 @@ switches
0
big_switch
16933280399284440835
13144136522122330070
17
1
32
@ -117,7 +117,7 @@ boolean_operators
50
boolop_loops
11270260636676715317
12402604614320574815
9
1
50
@ -137,7 +137,7 @@ conditional_operator
1
do_fallthrough
6898770640283947069
8714614136504380050
4
1
10

View File

@ -1,5 +1,5 @@
never_called
5644096560937528444
6820425066224770721
9
0
0
@ -17,7 +17,7 @@ main
1
dead_code
9636018207904213947
5254464978620792806
10
1
0

View File

@ -1,5 +1,5 @@
_Z9range_forv
6169071350249721981
8789831523895825398
5
1
4

View File

@ -1,5 +1,5 @@
_Z6throwsv
340120998528097520
18172607911962830854
9
1
100

View File

@ -1,6 +1,6 @@
main
# Func Hash:
8712453512413296413
8734802134600123338
# Num Counters:
9
# Counter Values:

View File

@ -1,6 +1,6 @@
main
# Func Hash:
1965403898329309329
3721743393642630379
# Num Counters:
10
# Counter Values:

View File

@ -1,6 +1,6 @@
main
# Func Hash:
1965403898329309329
872687477373597607
# Num Counters:
9
# Counter Values:

View File

@ -0,0 +1,22 @@
// Test that a slight change in the code leads to a different hash.
// RUN: %clang_cc1 -UEXTRA -triple x86_64-unknown-linux-gnu -main-file-name c-collision.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s --check-prefix=CHECK-NOEXTRA
// RUN: %clang_cc1 -DEXTRA -triple x86_64-unknown-linux-gnu -main-file-name c-collision.c %s -o - -emit-llvm -fprofile-instrument=clang | FileCheck %s --check-prefix=CHECK-EXTRA
// CHECK-NOEXTRA: @__profd_foo = private global { {{.*}} } { i64 6699318081062747564, i64 7156072912471487002,
// CHECK-EXTRA: @__profd_foo = private global { {{.*}} } { i64 6699318081062747564, i64 -4383447408116050035,
extern int bar;
void foo() {
if (bar) {
}
if (bar) {
}
if (bar) {
if (bar) {
#ifdef EXTRA
if (bar) {
}
#endif
}
}
}

View File

@ -4,6 +4,7 @@
// RUN: llvm-profdata merge %S/Inputs/c-general.proftext -o %t.profdata
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument-use-path=%t.profdata | FileCheck -allow-deprecated-dag-overlap -check-prefix=PGOUSE %s
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument-use-path=%S/Inputs/c-general.profdata.v5 | FileCheck -allow-deprecated-dag-overlap -check-prefix=PGOUSE %s
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument-use-path=%S/Inputs/c-general.profdata.v3 | FileCheck -allow-deprecated-dag-overlap -check-prefix=PGOUSE %s
// Also check compatibility with older profiles.
// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instrument-use-path=%S/Inputs/c-general.profdata.v1 | FileCheck -allow-deprecated-dag-overlap -check-prefix=PGOUSE %s

View File

@ -979,6 +979,9 @@ enum ProfVersion {
Version4 = 4,
// In this version, the frontend PGO stable hash algorithm defaults to V2.
Version5 = 5,
// In this version, the frontend PGO stable hash algorithm got fixed and
// may produce hashes different from Version5.
Version6 = 6,
// The current version is 5.
CurrentVersion = INSTR_PROF_INDEX_VERSION
};

View File

@ -657,7 +657,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
/* Raw profile format version (start from 1). */
#define INSTR_PROF_RAW_VERSION 5
/* Indexed profile format version (start from 1). */
#define INSTR_PROF_INDEX_VERSION 5
#define INSTR_PROF_INDEX_VERSION 6
/* Coverage mapping format version (start from 0). */
#define INSTR_PROF_COVMAP_VERSION 3