Improve performance when parsing symbol tables in mach-o files.

Some larger projects were loading quite slowly with the current LLDB on macOS and macOS simulator builds. I did some instrument traces and found 3 main culprits:
- a LLDB timer that was put into a function that was called too often
- a std::set that was keeping track of the address of symbols that were already added
- a unnamed function generator in ObjectFile that was going slow due to allocations

In order to see this in action I ran the latest LLDB on a large application with many frameworks using the following method:

(lldb) script import time; start_time = time.perf_counter()
(lldb) file Large.app
(lldb) script print(time.perf_counter() - start_time)

I first range "sudo purge" to clear the system file caches to simulate a cold startup of the debugger, followed by two iterations with warm file caches.

Prior to this fix I was seeing the following timings:

17.68 (cold)
14.56 (warm 1)
14.52 (warm 2)

After this fix I was seeing:

11.32 (cold)
8.43 (warm 1)
8.49 (warm 2)

Differential Revision: https://reviews.llvm.org/D103504
This commit is contained in:
Greg Clayton 2021-06-01 21:52:31 -07:00
parent 8c948b18e9
commit b0572abf72
3 changed files with 30 additions and 19 deletions

View File

@ -440,8 +440,6 @@ CompUnitSP Module::GetCompileUnitAtIndex(size_t index) {
bool Module::ResolveFileAddress(lldb::addr_t vm_addr, Address &so_addr) {
std::lock_guard<std::recursive_mutex> guard(m_mutex);
LLDB_SCOPED_TIMERF("Module::ResolveFileAddress (vm_addr = 0x%" PRIx64 ")",
vm_addr);
SectionList *section_list = GetSectionList();
if (section_list)
return so_addr.ResolveAddressUsingFileSections(vm_addr, section_list);

View File

@ -44,6 +44,7 @@
#include "lldb/Host/SafeMachO.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
@ -1895,9 +1896,9 @@ public:
filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath();
Host::SystemLog(Host::eSystemLogError,
"error: unable to find section %d for a symbol in %s, corrupt file?\n",
n_sect,
filename.c_str());
"error: unable to find section %d for a symbol in "
"%s, corrupt file?\n",
n_sect, filename.c_str());
}
}
if (m_section_infos[n_sect].vm_range.Contains(file_addr)) {
@ -2183,7 +2184,16 @@ size_t ObjectFileMachO::ParseSymtab() {
// We add symbols to the table in the order of most information (nlist
// records) to least (function starts), and avoid duplicating symbols
// via this set.
std::set<addr_t> symbols_added;
llvm::DenseSet<addr_t> symbols_added;
// We are using a llvm::DenseSet for "symbols_added" so we must be sure we
// do not add the tombstone or empty keys to the set.
auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) {
// Don't add the tombstone or empty keys.
if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1)
return;
symbols_added.insert(file_addr);
};
FunctionStarts function_starts;
lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
uint32_t i;
@ -3640,9 +3650,9 @@ size_t ObjectFileMachO::ParseSymtab() {
symbol_section);
sym[GSYM_sym_idx].GetAddressRef().SetOffset(
symbol_value);
symbols_added.insert(sym[GSYM_sym_idx]
.GetAddress()
.GetFileAddress());
add_symbol_addr(sym[GSYM_sym_idx]
.GetAddress()
.GetFileAddress());
// We just need the flags from the linker
// symbol, so put these flags
// into the N_GSYM flags to avoid duplicate
@ -3662,7 +3672,7 @@ size_t ObjectFileMachO::ParseSymtab() {
if (set_value) {
sym[sym_idx].GetAddressRef().SetSection(symbol_section);
sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
symbols_added.insert(
add_symbol_addr(
sym[sym_idx].GetAddress().GetFileAddress());
}
sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
@ -4475,7 +4485,7 @@ size_t ObjectFileMachO::ParseSymtab() {
// invalid address of zero when the global is a common symbol.
sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
symbols_added.insert(
add_symbol_addr(
sym[GSYM_sym_idx].GetAddress().GetFileAddress());
// We just need the flags from the linker symbol, so put these
// flags into the N_GSYM flags to avoid duplicate symbols in
@ -4494,7 +4504,8 @@ size_t ObjectFileMachO::ParseSymtab() {
if (set_value) {
sym[sym_idx].GetAddressRef().SetSection(symbol_section);
sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
symbols_added.insert(sym[sym_idx].GetAddress().GetFileAddress());
if (symbol_section)
add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress());
}
sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
if (nlist.n_desc & N_WEAK_REF)
@ -4590,7 +4601,7 @@ size_t ObjectFileMachO::ParseSymtab() {
sym[sym_idx].SetIsSynthetic(true);
sym[sym_idx].SetExternal(true);
sym[sym_idx].GetAddressRef() = symbol_addr;
symbols_added.insert(symbol_addr.GetFileAddress());
add_symbol_addr(symbol_addr.GetFileAddress());
if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB);
++sym_idx;
@ -4645,7 +4656,7 @@ size_t ObjectFileMachO::ParseSymtab() {
sym[sym_idx].SetType(eSymbolTypeCode);
sym[sym_idx].SetIsSynthetic(true);
sym[sym_idx].GetAddressRef() = symbol_addr;
symbols_added.insert(symbol_addr.GetFileAddress());
add_symbol_addr(symbol_addr.GetFileAddress());
if (symbol_flags)
sym[sym_idx].SetFlags(symbol_flags);
if (symbol_byte_size)
@ -4746,7 +4757,7 @@ size_t ObjectFileMachO::ParseSymtab() {
sym[sym_idx].SetType(eSymbolTypeResolver);
sym[sym_idx].SetIsSynthetic(true);
sym[sym_idx].GetAddressRef() = so_addr;
symbols_added.insert(so_addr.GetFileAddress());
add_symbol_addr(so_addr.GetFileAddress());
sym[sym_idx].SetByteSize(symbol_stub_byte_size);
++sym_idx;
}

View File

@ -617,11 +617,13 @@ ObjectFile::GetSymbolTypeFromName(llvm::StringRef name,
}
ConstString ObjectFile::GetNextSyntheticSymbolName() {
StreamString ss;
llvm::SmallString<256> name;
llvm::raw_svector_ostream os(name);
ConstString file_name = GetModule()->GetFileSpec().GetFilename();
ss.Printf("___lldb_unnamed_symbol%u$$%s", ++m_synthetic_symbol_idx,
file_name.GetCString());
return ConstString(ss.GetString());
++m_synthetic_symbol_idx;
os << "___lldb_unnamed_symbol" << m_synthetic_symbol_idx << "$$"
<< file_name.GetStringRef();
return ConstString(os.str());
}
std::vector<ObjectFile::LoadableData>