[PDB] Sort globals symbols by name in GSI hash buckets.

It seems like the debugger first computes a symbol's bucket,
and then does a binary search of entries in the bucket using the
symbol's name in order to find it.  If the bucket entries are not
in sorted order, this obviously won't work.  After this patch a
couple of simple test cases show that we generate an exactly
identical GSI hash stream, which is very nice.

llvm-svn: 336405
This commit is contained in:
Zachary Turner 2018-07-06 02:33:58 +00:00
parent 4832b9ea6b
commit 1f200adfa7
7 changed files with 112 additions and 5 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,42 @@
REQUIRES: diasdk
Input object file reconstruction:
; // main.cpp
; struct S {
; // Function names are chosen specifically to generate hash collisions in the
; // GSI hash table.
; virtual int A307() { return 102; }
; virtual int A400() { return 12; }
; virtual int A206() { return 201; }
; virtual int A105() { return 300; }
; };
;
; struct T : public S {
; int A105() override { return 300; }
; int A307() override { return 102; }
; int A206() override { return 201; }
; int A400() override { return 12; }
; };
;
; int main(int argc, char **argv) {
; T s;
; return s.A105() + s.A206() + s.A307() + s.A400();
; }
clang-cl /Z7 /GS- /GR- /c main.cpp /Foglobals-dia-vfunc-collision.obj
RUN: lld-link /debug /nodefaultlib /entry:main /out:%t.exe %S/Inputs/globals-dia-vfunc-collision.obj
RUN: llvm-pdbutil pretty -classes %t.pdb | FileCheck %s
CHECK: struct T
CHECK: func [0x000010c0+ 0 - 0x000010dd-29 | sizeof= 29] (FPO) virtual int __cdecl A105()
CHECK: func [0x00001100+ 0 - 0x0000111b-27 | sizeof= 27] (FPO) virtual int __cdecl A307()
CHECK: func [0x000010e0+ 0 - 0x000010fd-29 | sizeof= 29] (FPO) virtual int __cdecl A206()
CHECK: func [0x00001120+ 0 - 0x0000113b-27 | sizeof= 27] (FPO) virtual int __cdecl A400()
CHECK: struct S
CHECK: func [0x00001160+ 0 - 0x0000116c-12 | sizeof= 12] (FPO) virtual int __cdecl A307()
CHECK: func [0x00001170+ 0 - 0x0000117c-12 | sizeof= 12] (FPO) virtual int __cdecl A400()
CHECK: func [0x00001180+ 0 - 0x0000118c-12 | sizeof= 12] (FPO) virtual int __cdecl A206()
CHECK: func [0x00001190+ 0 - 0x0000119c-12 | sizeof= 12] (FPO) virtual int __cdecl A105()

View File

@ -0,0 +1,25 @@
REQUIRES: diasdk
Input object file reconstruction:
; // main.cpp
; struct S {
; // Function names are chosen specifically to generate hash collisions in the
; // GSI hash table.
; virtual int A132() { return 102; }
; virtual int A1001() { return 300; }
; };
;
; int main(int argc, char **argv) {
; S s;
; return s.A132();
; }
clang-cl /Z7 /GS- /GR- /c main.cpp /Foglobals-dia-vfunc-collision2.obj
RUN: lld-link /debug /nodefaultlib /entry:main /out:%t.exe %S/Inputs/globals-dia-vfunc-collision2.obj
RUN: llvm-pdbutil pretty -classes %t.pdb | FileCheck %s
CHECK: struct S
CHECK: func [0x00001060+ 0 - 0x0000106c-12 | sizeof= 12] (FPO) virtual int __cdecl A132()
CHECK: func [0x00001070+ 0 - 0x0000107c-12 | sizeof= 12] (FPO) virtual int __cdecl A1001()

View File

@ -0,0 +1,26 @@
REQUIRES: diasdk
Input object file reconstruction:
; // main.cpp
; struct Base {
; virtual int V2() { return 42; }
; };
;
; struct Derived : public Base {
; int V2() override { return 42; }
; };
;
; int main()
; {
; Derived D;
; return D.V2();
; }
clang-cl /Z7 /GS- /GR- /c main.cpp /Foglobals-dia-vfunc-simple.obj
RUN: lld-link /debug /nodefaultlib /entry:main /out:%t.exe %S/Inputs/globals-dia-vfunc-simple.obj
RUN: llvm-pdbutil pretty -classes %t.pdb | FileCheck %s
CHECK: func [0x00001070+ 0 - 0x0000107c-12 | sizeof= 12] (FPO) virtual int __cdecl V2()
CHECK: func [0x000010a0+ 0 - 0x000010ac-12 | sizeof= 12] (FPO) virtual int __cdecl V2()

View File

@ -83,7 +83,8 @@ Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
} }
void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) { void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
std::array<std::vector<PSHashRecord>, IPHR_HASH + 1> TmpBuckets; std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1>
TmpBuckets;
uint32_t SymOffset = RecordZeroOffset; uint32_t SymOffset = RecordZeroOffset;
for (const CVSymbol &Sym : Records) { for (const CVSymbol &Sym : Records) {
PSHashRecord HR; PSHashRecord HR;
@ -94,8 +95,7 @@ void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
// Hash the name to figure out which bucket this goes into. // Hash the name to figure out which bucket this goes into.
StringRef Name = getSymbolName(Sym); StringRef Name = getSymbolName(Sym);
size_t BucketIdx = hashStringV1(Name) % IPHR_HASH; size_t BucketIdx = hashStringV1(Name) % IPHR_HASH;
TmpBuckets[BucketIdx].push_back(HR); // FIXME: Does order matter? TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR));
SymOffset += Sym.length(); SymOffset += Sym.length();
} }
@ -117,8 +117,22 @@ void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
ulittle32_t ChainStartOff = ulittle32_t ChainStartOff =
ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc); ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
HashBuckets.push_back(ChainStartOff); HashBuckets.push_back(ChainStartOff);
for (const auto &HR : Bucket)
HashRecords.push_back(HR); // Sort each bucket by memcmp of the symbol's name.
std::sort(Bucket.begin(), Bucket.end(),
[](const std::pair<StringRef, PSHashRecord> &Left,
const std::pair<StringRef, PSHashRecord> &Right) {
size_t LS = Left.first.size();
size_t RS = Right.first.size();
if (LS < RS)
return true;
if (LS > RS)
return false;
return Left.first < Right.first;
});
for (const auto &Entry : Bucket)
HashRecords.push_back(Entry.second);
} }
} }