Fix the encoding and decoding of UniqueCStringMap<T> objects when saved to cache files.

UniqueCStringMap<T> objects are a std::vector<UniqueCStringMap::Entry> objects where the Entry object contains a ConstString + T. The values in the vector are sorted first by ConstString and then by the T value. ConstString objects are simply uniqued "const char *" values and when we compare we use the actual string pointer as the value we sort by. This caused a problem when we saved the symbol table name indexes and debug info indexes to disk in one process when they were sorted, and then loaded them into another process when decoding them from the cache files. Why? Because the order in which the ConstString objects were created are now completely different and the string pointers will no longer be sorted in the new process the cache was loaded into.

The unit tests created for the initial patch didn't catch the encoding and decoding issues of UniqueCStringMap<T> because they were happening in the same process and encoding and decoding would end up createing sorted UniqueCStringMap<T> objects due to the constant string pool being exactly the same.

This patch does the sort and also reserves the right amount of entries in the UniqueCStringMap::m_map prior to adding them all to avoid doing multiple allocations.

Added a unit test that loads an object file from yaml, and then I created a cache file for the original file and removed the cache file's signature mod time check since we will generate an object file from the YAML, and use that as the object file for the Symtab object. Then we load the cache data from the array of symtab cache bytes so that the ConstString "const char *" values will not match the current process, and verify we can lookup the 4 names from the object file in the symbol table.

Differential Revision: https://reviews.llvm.org/D124572
This commit is contained in:
Greg Clayton 2022-04-27 14:53:45 -07:00
parent 8bdfc73f63
commit 268089b6ac
3 changed files with 436 additions and 0 deletions

View File

@ -100,6 +100,7 @@ bool NameToDIE::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
if (identifier != kIdentifierNameToDIE)
return false;
const uint32_t count = data.GetU32(offset_ptr);
m_map.Reserve(count);
for (uint32_t i = 0; i < count; ++i) {
llvm::StringRef str(strtab.Get(data.GetU32(offset_ptr)));
// No empty strings allowed in the name to DIE maps.
@ -110,6 +111,16 @@ bool NameToDIE::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
else
return false;
}
// We must sort the UniqueCStringMap after decoding it since it is a vector
// of UniqueCStringMap::Entry objects which contain a ConstString and type T.
// ConstString objects are sorted by "const char *" and then type T and
// the "const char *" are point values that will depend on the order in which
// ConstString objects are created and in which of the 256 string pools they
// are created in. So after we decode all of the entries, we must sort the
// name map to ensure name lookups succeed. If we encode and decode within
// the same process we wouldn't need to sort, so unit testing didn't catch
// this issue when first checked in.
m_map.Sort(std::less<DIERef>());
return true;
}

View File

@ -1204,6 +1204,7 @@ bool DecodeCStrMap(const DataExtractor &data, lldb::offset_t *offset_ptr,
if (identifier != kIdentifierCStrMap)
return false;
const uint32_t count = data.GetU32(offset_ptr);
cstr_map.Reserve(count);
for (uint32_t i=0; i<count; ++i)
{
llvm::StringRef str(strtab.Get(data.GetU32(offset_ptr)));
@ -1213,6 +1214,16 @@ bool DecodeCStrMap(const DataExtractor &data, lldb::offset_t *offset_ptr,
return false;
cstr_map.Append(ConstString(str), value);
}
// We must sort the UniqueCStringMap after decoding it since it is a vector
// of UniqueCStringMap::Entry objects which contain a ConstString and type T.
// ConstString objects are sorted by "const char *" and then type T and
// the "const char *" are point values that will depend on the order in which
// ConstString objects are created and in which of the 256 string pools they
// are created in. So after we decode all of the entries, we must sort the
// name map to ensure name lookups succeed. If we encode and decode within
// the same process we wouldn't need to sort, so unit testing didn't catch
// this issue when first checked in.
cstr_map.Sort();
return true;
}

View File

@ -303,3 +303,417 @@ LinkEditData:
module_symtab->PreloadSymbols();
EncodeDecode(*module_symtab);
}
TEST_F(SymtabTest, TestDecodeCStringMaps) {
// Symbol tables save out the symbols, but they also save out the symbol table
// name indexes. These name indexes are a map of sorted ConstString + T pairs
// and when they are decoded from a file, they are no longer sorted since
// ConstString objects can be sorted by "const char *" and the order in which
// these strings are created won't be the same in a new process. We need to
// ensure these name lookups happen correctly when we load the name indexes,
// so this test loads a symbol table from a cache file from
// "lldb/unittests/Symbol/Inputs/indexnames-symtab-cache" and make sure we
// can correctly lookup each of the names in the symbol table.
auto ExpectedFile = TestFile::fromYaml(R"(
--- !mach-o
FileHeader:
magic: 0xFEEDFACF
cputype: 0x100000C
cpusubtype: 0x0
filetype: 0x2
ncmds: 16
sizeofcmds: 744
flags: 0x200085
reserved: 0x0
LoadCommands:
- cmd: LC_SEGMENT_64
cmdsize: 72
segname: __PAGEZERO
vmaddr: 0
vmsize: 4294967296
fileoff: 0
filesize: 0
maxprot: 0
initprot: 0
nsects: 0
flags: 0
- cmd: LC_SEGMENT_64
cmdsize: 232
segname: __TEXT
vmaddr: 4294967296
vmsize: 16384
fileoff: 0
filesize: 16384
maxprot: 5
initprot: 5
nsects: 2
flags: 0
Sections:
- sectname: __text
segname: __TEXT
addr: 0x100003F64
size: 76
offset: 0x3F64
align: 2
reloff: 0x0
nreloc: 0
flags: 0x80000400
reserved1: 0x0
reserved2: 0x0
reserved3: 0x0
content: 80018052C0035FD6E0028052C0035FD640048052C0035FD6FF8300D1FD7B01A9FD43009108008052E80B00B9BFC31FB8F4FFFF97F5FFFF97F6FFFF97E00B40B9FD7B41A9FF830091C0035FD6
- sectname: __unwind_info
segname: __TEXT
addr: 0x100003FB0
size: 80
offset: 0x3FB0
align: 2
reloff: 0x0
nreloc: 0
flags: 0x0
reserved1: 0x0
reserved2: 0x0
reserved3: 0x0
content: 010000001C000000000000001C000000000000001C00000002000000643F00003400000034000000B13F00000000000034000000030000000C0002001400020000000001180000000000000400000002
- cmd: LC_SEGMENT_64
cmdsize: 72
segname: __LINKEDIT
vmaddr: 4294983680
vmsize: 16384
fileoff: 16384
filesize: 994
maxprot: 1
initprot: 1
nsects: 0
flags: 0
- cmd: LC_DYLD_CHAINED_FIXUPS
cmdsize: 16
dataoff: 16384
datasize: 56
- cmd: LC_DYLD_EXPORTS_TRIE
cmdsize: 16
dataoff: 16440
datasize: 80
- cmd: LC_SYMTAB
cmdsize: 24
symoff: 16528
nsyms: 25
stroff: 16928
strsize: 176
- cmd: LC_DYSYMTAB
cmdsize: 80
ilocalsym: 0
nlocalsym: 20
iextdefsym: 20
nextdefsym: 5
iundefsym: 25
nundefsym: 0
tocoff: 0
ntoc: 0
modtaboff: 0
nmodtab: 0
extrefsymoff: 0
nextrefsyms: 0
indirectsymoff: 0
nindirectsyms: 0
extreloff: 0
nextrel: 0
locreloff: 0
nlocrel: 0
- cmd: LC_LOAD_DYLINKER
cmdsize: 32
name: 12
Content: '/usr/lib/dyld'
ZeroPadBytes: 7
- cmd: LC_UUID
cmdsize: 24
uuid: 3E94866E-0D1A-39BD-975B-64E8F1FDBAAE
- cmd: LC_BUILD_VERSION
cmdsize: 32
platform: 1
minos: 786432
sdk: 787200
ntools: 1
Tools:
- tool: 3
version: 49938432
- cmd: LC_SOURCE_VERSION
cmdsize: 16
version: 0
- cmd: LC_MAIN
cmdsize: 24
entryoff: 16252
stacksize: 0
- cmd: LC_LOAD_DYLIB
cmdsize: 56
dylib:
name: 24
timestamp: 2
current_version: 85943299
compatibility_version: 65536
Content: '/usr/lib/libSystem.B.dylib'
ZeroPadBytes: 6
- cmd: LC_FUNCTION_STARTS
cmdsize: 16
dataoff: 16520
datasize: 8
- cmd: LC_DATA_IN_CODE
cmdsize: 16
dataoff: 16528
datasize: 0
- cmd: LC_CODE_SIGNATURE
cmdsize: 16
dataoff: 17104
datasize: 274
LinkEditData:
NameList:
- n_strx: 43
n_type: 0x64
n_sect: 0
n_desc: 0
n_value: 0
- n_strx: 91
n_type: 0x64
n_sect: 0
n_desc: 0
n_value: 0
- n_strx: 98
n_type: 0x66
n_sect: 0
n_desc: 1
n_value: 1651098491
- n_strx: 1
n_type: 0x2E
n_sect: 1
n_desc: 0
n_value: 4294983524
- n_strx: 152
n_type: 0x24
n_sect: 1
n_desc: 0
n_value: 4294983524
- n_strx: 1
n_type: 0x24
n_sect: 0
n_desc: 0
n_value: 8
- n_strx: 1
n_type: 0x4E
n_sect: 1
n_desc: 0
n_value: 8
- n_strx: 1
n_type: 0x2E
n_sect: 1
n_desc: 0
n_value: 4294983532
- n_strx: 157
n_type: 0x24
n_sect: 1
n_desc: 0
n_value: 4294983532
- n_strx: 1
n_type: 0x24
n_sect: 0
n_desc: 0
n_value: 8
- n_strx: 1
n_type: 0x4E
n_sect: 1
n_desc: 0
n_value: 8
- n_strx: 1
n_type: 0x2E
n_sect: 1
n_desc: 0
n_value: 4294983540
- n_strx: 162
n_type: 0x24
n_sect: 1
n_desc: 0
n_value: 4294983540
- n_strx: 1
n_type: 0x24
n_sect: 0
n_desc: 0
n_value: 8
- n_strx: 1
n_type: 0x4E
n_sect: 1
n_desc: 0
n_value: 8
- n_strx: 1
n_type: 0x2E
n_sect: 1
n_desc: 0
n_value: 4294983548
- n_strx: 167
n_type: 0x24
n_sect: 1
n_desc: 0
n_value: 4294983548
- n_strx: 1
n_type: 0x24
n_sect: 0
n_desc: 0
n_value: 52
- n_strx: 1
n_type: 0x4E
n_sect: 1
n_desc: 0
n_value: 52
- n_strx: 1
n_type: 0x64
n_sect: 1
n_desc: 0
n_value: 0
- n_strx: 2
n_type: 0xF
n_sect: 1
n_desc: 16
n_value: 4294967296
- n_strx: 22
n_type: 0xF
n_sect: 1
n_desc: 0
n_value: 4294983532
- n_strx: 27
n_type: 0xF
n_sect: 1
n_desc: 0
n_value: 4294983540
- n_strx: 32
n_type: 0xF
n_sect: 1
n_desc: 0
n_value: 4294983524
- n_strx: 37
n_type: 0xF
n_sect: 1
n_desc: 0
n_value: 4294983548
StringTable:
- ' '
- __mh_execute_header
- _bar
- _baz
- _foo
- _main
- '/Users/gclayton/Documents/objfiles/index-names/'
- main.c
- '/Users/gclayton/Documents/objfiles/index-names/main.o'
- _foo
- _bar
- _baz
- _main
- ''
- ''
- ''
FunctionStarts: [ 0x3F64, 0x3F6C, 0x3F74, 0x3F7C ]
...
)");
// This data was taken from a hex dump of the object file from the above yaml
// and hexdumped so we can load the cache data in this test.
const uint8_t symtab_cache_bytes[] = {
0x01, 0x10, 0x3e, 0x94, 0x86, 0x6e, 0x0d, 0x1a,
0x39, 0xbd, 0x97, 0x5b, 0x64, 0xe8, 0xf1, 0xfd,
0xba, 0xae, 0xff, 0x53, 0x54, 0x41, 0x42, 0x91,
0x00, 0x00, 0x00, 0x00, 0x2f, 0x55, 0x73, 0x65,
0x72, 0x73, 0x2f, 0x67, 0x63, 0x6c, 0x61, 0x79,
0x74, 0x6f, 0x6e, 0x2f, 0x44, 0x6f, 0x63, 0x75,
0x6d, 0x65, 0x6e, 0x74, 0x73, 0x2f, 0x6f, 0x62,
0x6a, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x2f, 0x69,
0x6e, 0x64, 0x65, 0x78, 0x2d, 0x6e, 0x61, 0x6d,
0x65, 0x73, 0x2f, 0x6d, 0x61, 0x69, 0x6e, 0x2e,
0x63, 0x00, 0x2f, 0x55, 0x73, 0x65, 0x72, 0x73,
0x2f, 0x67, 0x63, 0x6c, 0x61, 0x79, 0x74, 0x6f,
0x6e, 0x2f, 0x44, 0x6f, 0x63, 0x75, 0x6d, 0x65,
0x6e, 0x74, 0x73, 0x2f, 0x6f, 0x62, 0x6a, 0x66,
0x69, 0x6c, 0x65, 0x73, 0x2f, 0x69, 0x6e, 0x64,
0x65, 0x78, 0x2d, 0x6e, 0x61, 0x6d, 0x65, 0x73,
0x2f, 0x6d, 0x61, 0x69, 0x6e, 0x2e, 0x6f, 0x00,
0x66, 0x6f, 0x6f, 0x00, 0x62, 0x61, 0x72, 0x00,
0x62, 0x61, 0x7a, 0x00, 0x6d, 0x61, 0x69, 0x6e,
0x00, 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65,
0x63, 0x75, 0x74, 0x65, 0x5f, 0x68, 0x65, 0x61,
0x64, 0x65, 0x72, 0x00, 0x53, 0x59, 0x4d, 0x42,
0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x2a,
0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x64, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0a, 0x20, 0x01, 0x37, 0x00, 0x00, 0x00, 0x00,
0x7b, 0xc3, 0x69, 0x62, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x66, 0x00, 0x04, 0x00, 0x00, 0x00,
0x00, 0x00, 0x02, 0x32, 0x01, 0x6d, 0x00, 0x00,
0x00, 0x01, 0x64, 0x3f, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x08, 0x00,
0x00, 0x00, 0x00, 0x00, 0x02, 0x32, 0x01, 0x71,
0x00, 0x00, 0x00, 0x01, 0x6c, 0x3f, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x00,
0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x32,
0x01, 0x75, 0x00, 0x00, 0x00, 0x01, 0x74, 0x3f,
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x0f, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x32, 0x01, 0x79, 0x00, 0x00, 0x00, 0x01,
0x7c, 0x3f, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x0f, 0x00, 0x14, 0x00, 0x00, 0x00,
0x00, 0x00, 0x04, 0x12, 0x02, 0x7e, 0x00, 0x00,
0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00, 0x64, 0x3f, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x10, 0x00, 0x0f, 0x00, 0x01, 0x00,
0x43, 0x4d, 0x41, 0x50, 0x07, 0x00, 0x00, 0x00,
0x6d, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x75, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x79, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
0x37, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x71, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x7e, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00
};
ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
auto module_sp = std::make_shared<Module>(ExpectedFile->moduleSpec());
ObjectFile *objfile = module_sp->GetObjectFile();
ASSERT_NE(objfile, nullptr);
// Test encoding and decoding an empty symbol table.
DataExtractor data(symtab_cache_bytes, sizeof(symtab_cache_bytes),
eByteOrderLittle, 8);
Symtab symtab(objfile);
offset_t data_offset = 0;
bool uuid_mismatch = false; // Gets set to true if signature doesn't match.
const bool success = symtab.Decode(data, &data_offset, uuid_mismatch);
ASSERT_EQ(success, true);
ASSERT_EQ(uuid_mismatch, false);
// Now make sure that name lookup works for all symbols. This indicates that
// the Symtab::NameToIndexMap was decoded correctly and works as expected.
Symbol *symbol = nullptr;
symbol = symtab.FindFirstSymbolWithNameAndType(ConstString("main"),
eSymbolTypeCode,
Symtab::eDebugAny,
Symtab::eVisibilityAny);
ASSERT_NE(symbol, nullptr);
symbol = symtab.FindFirstSymbolWithNameAndType(ConstString("foo"),
eSymbolTypeCode,
Symtab::eDebugAny,
Symtab::eVisibilityAny);
ASSERT_NE(symbol, nullptr);
symbol = symtab.FindFirstSymbolWithNameAndType(ConstString("bar"),
eSymbolTypeCode,
Symtab::eDebugAny,
Symtab::eVisibilityAny);
ASSERT_NE(symbol, nullptr);
symbol = symtab.FindFirstSymbolWithNameAndType(ConstString("baz"),
eSymbolTypeCode,
Symtab::eDebugAny,
Symtab::eVisibilityAny);
ASSERT_NE(symbol, nullptr);
}