Add hashing of the .text section to ProcessMinidump.

Breakpad will always have a UUID for binaries when it creates minidump files. If an ELF files has a GNU build ID, it will use that. If it doesn't, it will create one by hashing up to the first 4096 bytes of the .text section. LLDB was not able to load these binaries even when we had the right binary because the UUID didn't match. LLDB will use the GNU build ID first as the main UUID for a binary and fallback onto a 8 byte CRC if a binary doesn't have one. With this fix, we will check for the Breakpad hash or the Facebook hash (a modified version of the breakpad hash that collides a bit less) and accept binaries when these hashes match.

Differential Revision: https://reviews.llvm.org/D86261
This commit is contained in:
Greg Clayton 2020-08-10 15:07:47 -07:00
parent 44ee9d070a
commit 0e6c9a6e79
6 changed files with 218 additions and 0 deletions

View File

@ -121,6 +121,72 @@ private:
lldb::addr_t m_base;
lldb::addr_t m_size;
};
/// Duplicate the HashElfTextSection() from the breakpad sources.
///
/// Breakpad, a Google crash log reporting tool suite, creates minidump files
/// for many different architectures. When using Breakpad to create ELF
/// minidumps, it will check for a GNU build ID when creating a minidump file
/// and if one doesn't exist in the file, it will say the UUID of the file is a
/// checksum of up to the first 4096 bytes of the .text section. Facebook also
/// uses breakpad and modified this hash to avoid collisions so we can
/// calculate and check for this as well.
///
/// The breakpad code might end up hashing up to 15 bytes that immediately
/// follow the .text section in the file, so this code must do exactly what it
/// does so we can get an exact match for the UUID.
///
/// \param[in] module_sp The module to grab the .text section from.
///
/// \param[in/out] breakpad_uuid A vector that will receive the calculated
/// breakpad .text hash.
///
/// \param[in/out] facebook_uuid A vector that will receive the calculated
/// facebook .text hash.
///
void HashElfTextSection(ModuleSP module_sp, std::vector<uint8_t> &breakpad_uuid,
std::vector<uint8_t> &facebook_uuid) {
SectionList *sect_list = module_sp->GetSectionList();
if (sect_list == nullptr)
return;
SectionSP sect_sp = sect_list->FindSectionByName(ConstString(".text"));
if (!sect_sp)
return;
constexpr size_t kMDGUIDSize = 16;
constexpr size_t kBreakpadPageSize = 4096;
// The breakpad code has a bug where it might access beyond the end of a
// .text section by up to 15 bytes, so we must ensure we round up to the
// next kMDGUIDSize byte boundary.
DataExtractor data;
const size_t text_size = sect_sp->GetFileSize();
const size_t read_size = std::min<size_t>(
llvm::alignTo(text_size, kMDGUIDSize), kBreakpadPageSize);
sect_sp->GetObjectFile()->GetData(sect_sp->GetFileOffset(), read_size, data);
breakpad_uuid.assign(kMDGUIDSize, 0);
facebook_uuid.assign(kMDGUIDSize, 0);
// The only difference between the breakpad hash and the facebook hash is the
// hashing of the text section size into the hash prior to hashing the .text
// contents.
for (size_t i = 0; i < kMDGUIDSize; i++)
facebook_uuid[i] ^= text_size % 255;
// This code carefully duplicates how the hash was created in Breakpad
// sources, including the error where it might has an extra 15 bytes past the
// end of the .text section if the .text section is less than a page size in
// length.
const uint8_t *ptr = data.GetDataStart();
const uint8_t *ptr_end = data.GetDataEnd();
while (ptr < ptr_end) {
for (unsigned i = 0; i < kMDGUIDSize; i++) {
breakpad_uuid[i] ^= ptr[i];
facebook_uuid[i] ^= ptr[i];
}
ptr += kMDGUIDSize;
}
}
} // namespace
ConstString ProcessMinidump::GetPluginNameStatic() {
@ -494,10 +560,33 @@ void ProcessMinidump::ReadModuleList() {
const bool match = dmp_bytes.empty() || mod_bytes.empty() ||
mod_bytes.take_front(dmp_bytes.size()) == dmp_bytes;
if (!match) {
// Breakpad generates minindump files, and if there is no GNU build
// ID in the binary, it will calculate a UUID by hashing first 4096
// bytes of the .text section and using that as the UUID for a module
// in the minidump. Facebook uses a modified breakpad client that
// uses a slightly modified this hash to avoid collisions. Check for
// UUIDs from the minindump that match these cases and accept the
// module we find if they do match.
std::vector<uint8_t> breakpad_uuid;
std::vector<uint8_t> facebook_uuid;
HashElfTextSection(module_sp, breakpad_uuid, facebook_uuid);
if (dmp_bytes == llvm::ArrayRef<uint8_t>(breakpad_uuid)) {
LLDB_LOG(log, "Breakpad .text hash match for {0}.", name);
} else if (dmp_bytes == llvm::ArrayRef<uint8_t>(facebook_uuid)) {
LLDB_LOG(log, "Facebook .text hash match for {0}.", name);
} else {
// The UUID wasn't a partial match and didn't match the .text hash
// so remove the module from the target, we will need to create a
// placeholder object file.
GetTarget().GetImages().Remove(module_sp);
module_sp.reset();
}
} else {
LLDB_LOG(log, "Partial uuid match for {0}.", name);
}
}
} else {
LLDB_LOG(log, "Full uuid match for {0}.", name);
}
if (module_sp) {
// Watch out for place holder modules that have different paths, but the

View File

@ -179,6 +179,69 @@ class MiniDumpUUIDTestCase(TestBase):
"/invalid/path/on/current/system/libuuidmismatch.so",
"7295E17C-6668-9E05-CBB5-DEE5003865D5")
def test_breakpad_hash_match(self):
"""
Breakpad creates minidump files using CvRecord in each module whose
signature is set to PDB70 where the UUID is a hash generated by
breakpad of the .text section. This is only done when the
executable has no ELF build ID.
This test verifies that if we have a minidump with a 16 byte UUID,
that we are able to associate a symbol file with no ELF build ID
and match it up by hashing the .text section.
"""
so_path = self.getBuildArtifact("libbreakpad.so")
self.yaml2obj("libbreakpad.yaml", so_path)
cmd = 'settings set target.exec-search-paths "%s"' % (os.path.dirname(so_path))
self.dbg.HandleCommand(cmd)
modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
self.assertEqual(1, len(modules))
# LLDB makes up it own UUID as well when there is no build ID so we
# will check that this matches.
self.verify_module(modules[0], so_path, "D9C480E8")
def test_breakpad_overflow_hash_match(self):
"""
This is a similar to test_breakpad_hash_match, but it verifies that
if the .text section does not end on a 16 byte boundary, then it
will overflow into the next section's data by up to 15 bytes. This
verifies that we are able to match what breakpad does as it will do
this.
"""
so_path = self.getBuildArtifact("libbreakpad.so")
self.yaml2obj("libbreakpad-overflow.yaml", so_path)
cmd = 'settings set target.exec-search-paths "%s"' % (os.path.dirname(so_path))
self.dbg.HandleCommand(cmd)
modules = self.get_minidump_modules("linux-arm-breakpad-uuid-match.yaml")
self.assertEqual(1, len(modules))
# LLDB makes up it own UUID as well when there is no build ID so we
# will check that this matches.
self.verify_module(modules[0], so_path, "48EB9FD7")
def test_facebook_hash_match(self):
"""
Breakpad creates minidump files using CvRecord in each module whose
signature is set to PDB70 where the UUID is a hash generated by
breakpad of the .text section and Facebook modified this hash to
avoid collisions. This is only done when the executable has no ELF
build ID.
This test verifies that if we have a minidump with a 16 byte UUID,
that we are able to associate a symbol file with no ELF build ID
and match it up by hashing the .text section like Facebook does.
"""
so_path = self.getBuildArtifact("libbreakpad.so")
self.yaml2obj("libbreakpad.yaml", so_path)
cmd = 'settings set target.exec-search-paths "%s"' % (os.path.dirname(so_path))
self.dbg.HandleCommand(cmd)
modules = self.get_minidump_modules("linux-arm-facebook-uuid-match.yaml")
self.assertEqual(1, len(modules))
# LLDB makes up it own UUID as well when there is no build ID so we
# will check that this matches.
self.verify_module(modules[0], so_path, "D9C480E8")
def test_relative_module_name(self):
old_cwd = os.getcwd()
self.addTearDownHook(lambda: os.chdir(old_cwd))

View File

@ -0,0 +1,21 @@
--- !ELF
FileHeader:
Class: ELFCLASS32
Data: ELFDATA2LSB
Type: ET_DYN
Machine: EM_ARM
Flags: [ EF_ARM_SOFT_FLOAT, EF_ARM_EABI_VER5 ]
Sections:
Sections:
- Name: .text
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
Address: 0x0000000000010000
AddressAlign: 0x0000000000000001
Content: 04
- Name: .data
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC, SHF_WRITE ]
Address: 0x0000000000010001
AddressAlign: 0x0000000000000001
Content: 0000001400000003000000474E5500

View File

@ -0,0 +1,15 @@
--- !ELF
FileHeader:
Class: ELFCLASS32
Data: ELFDATA2LSB
Type: ET_DYN
Machine: EM_ARM
Flags: [ EF_ARM_SOFT_FLOAT, EF_ARM_EABI_VER5 ]
Sections:
Sections:
- Name: .text
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
Address: 0x0000000000010000
AddressAlign: 0x0000000000000004
Content: 040000001400000003000000474E5500

View File

@ -0,0 +1,15 @@
--- !minidump
Streams:
- Type: SystemInfo
Processor Arch: ARM
Platform ID: Linux
CSD Version: '15E216'
CPU:
CPUID: 0x00000000
- Type: ModuleList
Modules:
- Base of Image: 0x0000000000001000
Size of Image: 0x00001000
Module Name: '/invalid/path/on/current/system/libbreakpad.so'
CodeView Record: 52534453040000001400000003000000474e55000000000000
...

View File

@ -0,0 +1,15 @@
--- !minidump
Streams:
- Type: SystemInfo
Processor Arch: ARM
Platform ID: Linux
CSD Version: '15E216'
CPU:
CPUID: 0x00000000
- Type: ModuleList
Modules:
- Base of Image: 0x0000000000001000
Size of Image: 0x00001000
Module Name: '/invalid/path/on/current/system/libbreakpad.so'
CodeView Record: 52534453141010100410101013101010575e45100000000000
...