jGetLoadedDynamicLibrariesInfos can inspect machos not yet loaded

jGetLoadedDynamicLibrariesInfos normally checks with dyld to find
the list of binaries loaded in the inferior, and getting the filepath,
before trying to parse the Mach-O binary in inferior memory.
This allows for debugserver to parse a Mach-O binary present in memory,
but not yet registered with dyld.  This patch also adds some simple
sanity checks that we're reading a Mach-O header before we begin
stepping through load commands, because we won't have the sanity check
of consulting dyld for the list of loaded binaries before parsing.
Also adds a testcase.

Differential Revision: https://reviews.llvm.org/D128956
rdar://95737734
This commit is contained in:
Jason Molenda 2022-07-08 14:01:15 -07:00
parent df90d22704
commit 77a38f6839
5 changed files with 236 additions and 87 deletions

View File

@ -0,0 +1,3 @@
C_SOURCES = main.c
include Makefile.rules

View File

@ -0,0 +1,47 @@
"""Test that debugserver will parse a mach-o in inferior memory even if it's not loaded."""
import os
import re
import subprocess
import lldb
from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
from lldbsuite.test import lldbutil
class TestUnregisteredMacho(TestBase):
# newer debugserver required for jGetLoadedDynamicLibrariesInfos
# to support this
@skipIfOutOfTreeDebugserver
@no_debug_info_test
@skipUnlessDarwin
def test(self):
self.build()
target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(
self, "// break here", lldb.SBFileSpec("main.c"))
frame = thread.GetFrameAtIndex(0)
macho_buf = frame.GetValueForVariablePath("macho_buf")
macho_addr = macho_buf.GetValueAsUnsigned()
invalid_macho_addr = macho_buf.GetValueAsUnsigned() + 4
gdb_packet = "process plugin packet send 'jGetLoadedDynamicLibrariesInfos:{\"solib_addresses\":[%d]}]'" % macho_addr
# Send the jGetLoadedDynamicLibrariesInfos packet
# to debugserver, asking it to parse the mach-o binary
# at this address and give us the UUID etc, even though
# dyld doesn't think there is a binary at that address.
# We won't get a pathname for the binary (from dyld), but
# we will get to the LC_UUID and include that.
self.expect (gdb_packet, substrs=['"pathname":""', '"uuid":"1B4E28BA-2FA1-11D2-883F-B9A761BDE3FB"'])
no_macho_gdb_packet = "process plugin packet send 'jGetLoadedDynamicLibrariesInfos:{\"solib_addresses\":[%d]}]'" % invalid_macho_addr
self.expect (no_macho_gdb_packet, substrs=['response: {"images":[]}'])
# Test that we get back the information for the properly
# formatted Mach-O binary in memory, but do not get an
# entry for the invalid Mach-O address.
both_gdb_packet = "process plugin packet send 'jGetLoadedDynamicLibrariesInfos:{\"solib_addresses\":[%d,%d]}]'" % (macho_addr, invalid_macho_addr)
self.expect (both_gdb_packet, substrs=['"load_address":%d,' % macho_addr])
self.expect (both_gdb_packet, substrs=['"load_address":%d,' % invalid_macho_addr], matching=False)

View File

@ -0,0 +1,63 @@
#include <mach-o/loader.h>
#include <mach/machine.h>
#include <stdlib.h>
#include <string.h>
#include <uuid/uuid.h>
int main() {
int size_of_load_cmds =
sizeof(struct segment_command_64) + sizeof(struct uuid_command);
uint8_t *macho_buf =
(uint8_t *)malloc(sizeof(struct mach_header_64) + size_of_load_cmds);
uint8_t *p = macho_buf;
struct mach_header_64 mh;
mh.magic = MH_MAGIC_64;
mh.cputype = CPU_TYPE_ARM64;
mh.cpusubtype = 0;
mh.filetype = MH_EXECUTE;
mh.ncmds = 2;
mh.sizeofcmds = size_of_load_cmds;
mh.flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL | MH_PIE;
memcpy(p, &mh, sizeof(mh));
p += sizeof(mh);
struct segment_command_64 seg;
seg.cmd = LC_SEGMENT_64;
seg.cmdsize = sizeof(seg);
strcpy(seg.segname, "__TEXT");
seg.vmaddr = 0x5000;
seg.vmsize = 0x1000;
seg.fileoff = 0;
seg.filesize = 0;
seg.maxprot = 0;
seg.initprot = 0;
seg.nsects = 0;
seg.flags = 0;
memcpy(p, &seg, sizeof(seg));
p += sizeof(seg);
struct uuid_command uuid;
uuid.cmd = LC_UUID;
uuid.cmdsize = sizeof(uuid);
uuid_clear(uuid.uuid);
uuid_parse("1b4e28ba-2fa1-11d2-883f-b9a761bde3fb", uuid.uuid);
memcpy(p, &uuid, sizeof(uuid));
p += sizeof(uuid);
// If this needs to be debugged, the memory buffer can be written
// to a file with
// (lldb) mem rea -b -o /tmp/t -c `p - macho_buf` macho_buf
// (lldb) platform shell otool -hlv /tmp/t
// to verify that it is well formed.
// And inside lldb, it should be inspectable via
// (lldb) script print(lldb.frame.locals["macho_buf"][0].GetValueAsUnsigned())
// 105553162403968
// (lldb) process plugin packet send
// 'jGetLoadedDynamicLibrariesInfos:{"solib_addresses":[105553162403968]}]'
return 0; // break here
}

View File

@ -73,9 +73,11 @@ public:
uint64_t load_address;
uint64_t mod_date; // may not be available - 0 if so
struct mach_o_information macho_info;
bool is_valid_mach_header;
binary_image_information()
: filename(), load_address(INVALID_NUB_ADDRESS), mod_date(0) {}
: filename(), load_address(INVALID_NUB_ADDRESS), mod_date(0),
is_valid_mach_header(false) {}
};
// Child process control

View File

@ -746,6 +746,16 @@ const char *MachProcess::GetPlatformString(unsigned char platform) {
return nullptr;
}
static bool mach_header_validity_test (uint32_t magic, uint32_t cputype) {
if (magic != MH_MAGIC && magic != MH_CIGAM && magic != MH_MAGIC_64 &&
magic != MH_CIGAM_64)
return false;
if (cputype != CPU_TYPE_X86_64 && cputype != CPU_TYPE_ARM &&
cputype != CPU_TYPE_ARM64 && cputype != CPU_TYPE_ARM64_32)
return false;
return true;
}
// Given an address, read the mach-o header and load commands out of memory to
// fill in
// the mach_o_information "inf" object.
@ -757,12 +767,16 @@ bool MachProcess::GetMachOInformationFromMemory(
uint32_t dyld_platform, nub_addr_t mach_o_header_addr, int wordsize,
struct mach_o_information &inf) {
uint64_t load_cmds_p;
if (wordsize == 4) {
struct mach_header header;
if (ReadMemory(mach_o_header_addr, sizeof(struct mach_header), &header) !=
sizeof(struct mach_header)) {
return false;
}
if (!mach_header_validity_test(header.magic, header.cputype))
return false;
load_cmds_p = mach_o_header_addr + sizeof(struct mach_header);
inf.mach_header.magic = header.magic;
inf.mach_header.cputype = header.cputype;
@ -779,6 +793,8 @@ bool MachProcess::GetMachOInformationFromMemory(
&header) != sizeof(struct mach_header_64)) {
return false;
}
if (!mach_header_validity_test(header.magic, header.cputype))
return false;
load_cmds_p = mach_o_header_addr + sizeof(struct mach_header_64);
inf.mach_header.magic = header.magic;
inf.mach_header.cputype = header.cputype;
@ -896,6 +912,8 @@ JSONGenerator::ObjectSP MachProcess::FormatDynamicLibrariesIntoJSON(
const size_t image_count = image_infos.size();
for (size_t i = 0; i < image_count; i++) {
if (!image_infos[i].is_valid_mach_header)
continue;
JSONGenerator::DictionarySP image_info_dict_sp(
new JSONGenerator::Dictionary());
image_info_dict_sp->AddIntegerItem("load_address",
@ -970,7 +988,6 @@ JSONGenerator::ObjectSP MachProcess::FormatDynamicLibrariesIntoJSON(
}
JSONGenerator::DictionarySP reply_sp(new JSONGenerator::Dictionary());
;
reply_sp->AddItem("images", image_infos_array_sp);
return reply_sp;
@ -985,8 +1002,8 @@ JSONGenerator::ObjectSP MachProcess::FormatDynamicLibrariesIntoJSON(
// information.
JSONGenerator::ObjectSP MachProcess::GetLoadedDynamicLibrariesInfos(
nub_process_t pid, nub_addr_t image_list_address, nub_addr_t image_count) {
JSONGenerator::DictionarySP reply_sp;
JSONGenerator::ObjectSP empty_reply_sp(new JSONGenerator::Dictionary());
int pointer_size = GetInferiorAddrSize(pid);
std::vector<struct binary_image_information> image_infos;
@ -994,91 +1011,89 @@ JSONGenerator::ObjectSP MachProcess::GetLoadedDynamicLibrariesInfos(
uint8_t *image_info_buf = (uint8_t *)malloc(image_infos_size);
if (image_info_buf == NULL) {
return reply_sp;
return empty_reply_sp;
}
if (ReadMemory(image_list_address, image_infos_size, image_info_buf) !=
image_infos_size) {
return reply_sp;
if (ReadMemory(image_list_address, image_infos_size, image_info_buf) !=
image_infos_size) {
return empty_reply_sp;
}
/// First the image_infos array with (load addr, pathname, mod date)
/// tuples
for (size_t i = 0; i < image_count; i++) {
struct binary_image_information info;
nub_addr_t pathname_address;
if (pointer_size == 4) {
uint32_t load_address_32;
uint32_t pathname_address_32;
uint32_t mod_date_32;
::memcpy(&load_address_32, image_info_buf + (i * 3 * pointer_size), 4);
::memcpy(&pathname_address_32,
image_info_buf + (i * 3 * pointer_size) + pointer_size, 4);
::memcpy(&mod_date_32,
image_info_buf + (i * 3 * pointer_size) + pointer_size +
pointer_size,
4);
info.load_address = load_address_32;
info.mod_date = mod_date_32;
pathname_address = pathname_address_32;
} else {
uint64_t load_address_64;
uint64_t pathname_address_64;
uint64_t mod_date_64;
::memcpy(&load_address_64, image_info_buf + (i * 3 * pointer_size), 8);
::memcpy(&pathname_address_64,
image_info_buf + (i * 3 * pointer_size) + pointer_size, 8);
::memcpy(&mod_date_64,
image_info_buf + (i * 3 * pointer_size) + pointer_size +
pointer_size,
8);
info.load_address = load_address_64;
info.mod_date = mod_date_64;
pathname_address = pathname_address_64;
}
//// First the image_infos array with (load addr, pathname, mod date)
///tuples
for (size_t i = 0; i < image_count; i++) {
struct binary_image_information info;
nub_addr_t pathname_address;
if (pointer_size == 4) {
uint32_t load_address_32;
uint32_t pathname_address_32;
uint32_t mod_date_32;
::memcpy(&load_address_32, image_info_buf + (i * 3 * pointer_size), 4);
::memcpy(&pathname_address_32,
image_info_buf + (i * 3 * pointer_size) + pointer_size, 4);
::memcpy(&mod_date_32, image_info_buf + (i * 3 * pointer_size) +
pointer_size + pointer_size,
4);
info.load_address = load_address_32;
info.mod_date = mod_date_32;
pathname_address = pathname_address_32;
} else {
uint64_t load_address_64;
uint64_t pathname_address_64;
uint64_t mod_date_64;
::memcpy(&load_address_64, image_info_buf + (i * 3 * pointer_size), 8);
::memcpy(&pathname_address_64,
image_info_buf + (i * 3 * pointer_size) + pointer_size, 8);
::memcpy(&mod_date_64, image_info_buf + (i * 3 * pointer_size) +
pointer_size + pointer_size,
8);
info.load_address = load_address_64;
info.mod_date = mod_date_64;
pathname_address = pathname_address_64;
}
char strbuf[17];
info.filename = "";
uint64_t pathname_ptr = pathname_address;
bool still_reading = true;
while (still_reading &&
ReadMemory(pathname_ptr, sizeof(strbuf) - 1, strbuf) ==
sizeof(strbuf) - 1) {
strbuf[sizeof(strbuf) - 1] = '\0';
info.filename += strbuf;
pathname_ptr += sizeof(strbuf) - 1;
// Stop if we found nul byte indicating the end of the string
for (size_t i = 0; i < sizeof(strbuf) - 1; i++) {
if (strbuf[i] == '\0') {
still_reading = false;
break;
}
char strbuf[17];
info.filename = "";
uint64_t pathname_ptr = pathname_address;
bool still_reading = true;
while (still_reading && ReadMemory(pathname_ptr, sizeof(strbuf) - 1,
strbuf) == sizeof(strbuf) - 1) {
strbuf[sizeof(strbuf) - 1] = '\0';
info.filename += strbuf;
pathname_ptr += sizeof(strbuf) - 1;
// Stop if we found nul byte indicating the end of the string
for (size_t i = 0; i < sizeof(strbuf) - 1; i++) {
if (strbuf[i] == '\0') {
still_reading = false;
break;
}
}
uuid_clear(info.macho_info.uuid);
image_infos.push_back(info);
}
if (image_infos.size() == 0) {
return reply_sp;
uuid_clear(info.macho_info.uuid);
image_infos.push_back(info);
}
if (image_infos.size() == 0) {
return empty_reply_sp;
}
free(image_info_buf);
/// Second, read the mach header / load commands for all the dylibs
for (size_t i = 0; i < image_count; i++) {
// The SPI to provide platform is not available on older systems.
uint32_t platform = 0;
if (GetMachOInformationFromMemory(platform, image_infos[i].load_address,
pointer_size,
image_infos[i].macho_info)) {
image_infos[i].is_valid_mach_header = true;
}
}
free(image_info_buf);
/// Third, format all of the above in the JSONGenerator object.
//// Second, read the mach header / load commands for all the dylibs
for (size_t i = 0; i < image_count; i++) {
// The SPI to provide platform is not available on older systems.
uint32_t platform = 0;
if (!GetMachOInformationFromMemory(platform,
image_infos[i].load_address,
pointer_size,
image_infos[i].macho_info)) {
return reply_sp;
}
}
//// Third, format all of the above in the JSONGenerator object.
return FormatDynamicLibrariesIntoJSON(image_infos);
return reply_sp;
return FormatDynamicLibrariesIntoJSON(image_infos);
}
/// From dyld SPI header dyld_process_info.h
@ -1141,7 +1156,6 @@ void MachProcess::GetAllLoadedBinariesViaDYLDSPI(
// macOS 10.12, iOS 10, tvOS 10, watchOS 3 and newer.
JSONGenerator::ObjectSP
MachProcess::GetAllLoadedLibrariesInfos(nub_process_t pid) {
JSONGenerator::DictionarySP reply_sp;
int pointer_size = GetInferiorAddrSize(pid);
std::vector<struct binary_image_information> image_infos;
@ -1149,8 +1163,11 @@ MachProcess::GetAllLoadedLibrariesInfos(nub_process_t pid) {
uint32_t platform = GetPlatform();
const size_t image_count = image_infos.size();
for (size_t i = 0; i < image_count; i++) {
GetMachOInformationFromMemory(platform, image_infos[i].load_address,
pointer_size, image_infos[i].macho_info);
if (GetMachOInformationFromMemory(platform, image_infos[i].load_address,
pointer_size,
image_infos[i].macho_info)) {
image_infos[i].is_valid_mach_header = true;
}
}
return FormatDynamicLibrariesIntoJSON(image_infos);
}
@ -1160,10 +1177,11 @@ MachProcess::GetAllLoadedLibrariesInfos(nub_process_t pid) {
// dyld SPIs that exist in macOS 10.12, iOS 10, tvOS 10, watchOS 3 and newer.
JSONGenerator::ObjectSP MachProcess::GetLibrariesInfoForAddresses(
nub_process_t pid, std::vector<uint64_t> &macho_addresses) {
JSONGenerator::DictionarySP reply_sp;
int pointer_size = GetInferiorAddrSize(pid);
// Collect the list of all binaries that dyld knows about in
// the inferior process.
std::vector<struct binary_image_information> all_image_infos;
GetAllLoadedBinariesViaDYLDSPI(all_image_infos);
uint32_t platform = GetPlatform();
@ -1171,19 +1189,35 @@ JSONGenerator::ObjectSP MachProcess::GetLibrariesInfoForAddresses(
std::vector<struct binary_image_information> image_infos;
const size_t macho_addresses_count = macho_addresses.size();
const size_t all_image_infos_count = all_image_infos.size();
for (size_t i = 0; i < macho_addresses_count; i++) {
bool found_matching_entry = false;
for (size_t j = 0; j < all_image_infos_count; j++) {
if (all_image_infos[j].load_address == macho_addresses[i]) {
image_infos.push_back(all_image_infos[j]);
found_matching_entry = true;
}
}
if (!found_matching_entry) {
// dyld doesn't think there is a binary at this address,
// but maybe there isn't a binary YET - let's look in memory
// for a proper mach-o header etc and return what we can.
// We will have an empty filename for the binary (because dyld
// doesn't know about it yet) but we can read all of the mach-o
// load commands from memory directly.
struct binary_image_information entry;
entry.load_address = macho_addresses[i];
image_infos.push_back(entry);
}
}
const size_t image_infos_count = image_infos.size();
for (size_t i = 0; i < image_infos_count; i++) {
GetMachOInformationFromMemory(platform,
image_infos[i].load_address, pointer_size,
image_infos[i].macho_info);
if (GetMachOInformationFromMemory(platform, image_infos[i].load_address,
pointer_size,
image_infos[i].macho_info)) {
image_infos[i].is_valid_mach_header = true;
}
}
return FormatDynamicLibrariesIntoJSON(image_infos);
}