Set a default number of address bits on Darwin arm64 systems

With arm64e ARMv8.3 pointer authentication, lldb needs to know how
many bits are used for addressing and how many are used for pointer
auth signing.  This should be determined dynamically from the inferior
system / corefile, but there are some workflows where it still isn't
recorded and we fall back on a default value that is correct on some
Darwin environments.

This patch also explicitly sets the vendor of mach-o binaries to
Apple, so we select an Apple ABI instead of a random other ABI.

It adds a function pointer formatter for systems where pointer
authentication is in use, and we can strip the ptrauth bits off
of the function pointer address and get a different value that
points to an actual symbol.

Differential Revision: https://reviews.llvm.org/D115431
rdar://84644661
This commit is contained in:
Jason Molenda 2021-12-09 22:43:39 -08:00
parent e5d104b280
commit 223e8ca026
8 changed files with 240 additions and 13 deletions

View File

@ -9,6 +9,7 @@
#include "lldb/DataFormatters/CXXFunctionPointer.h"
#include "lldb/Core/ValueObject.h"
#include "lldb/Target/ABI.h"
#include "lldb/Target/SectionLoadList.h"
#include "lldb/Target/Target.h"
#include "lldb/Utility/Stream.h"
@ -38,8 +39,34 @@ bool lldb_private::formatters::CXXFunctionPointerSummaryProvider(
Address so_addr;
Target *target = exe_ctx.GetTargetPtr();
if (target && !target->GetSectionLoadList().IsEmpty()) {
if (target->GetSectionLoadList().ResolveLoadAddress(func_ptr_address,
so_addr)) {
target->GetSectionLoadList().ResolveLoadAddress(func_ptr_address,
so_addr);
if (so_addr.GetSection() == nullptr) {
// If we have an address that doesn't correspond to any symbol,
// it might have authentication bits. Strip them & see if it
// now points to a symbol -- if so, do the SymbolContext lookup
// based on the stripped address.
// If we find a symbol with the ptrauth bits stripped, print the
// raw value into the stream, and replace the Address with the
// one that points to a symbol for a fuller description.
if (Process *process = exe_ctx.GetProcessPtr()) {
if (ABISP abi_sp = process->GetABI()) {
addr_t fixed_addr = abi_sp->FixCodeAddress(func_ptr_address);
if (fixed_addr != func_ptr_address) {
Address test_address;
test_address.SetLoadAddress(fixed_addr, target);
if (test_address.GetSection() != nullptr) {
int addrsize = target->GetArchitecture().GetAddressByteSize();
sstr.Printf("actual=0x%*.*" PRIx64 " ", addrsize * 2,
addrsize * 2, fixed_addr);
so_addr = test_address;
}
}
}
}
}
if (so_addr.IsValid()) {
so_addr.Dump(&sstr, exe_ctx.GetBestExecutionContextScope(),
Address::DumpStyleResolvedDescription,
Address::DumpStyleSectionNameOffset);

View File

@ -817,6 +817,16 @@ ValueObjectSP ABIMacOSX_arm64::GetReturnValueObjectImpl(
lldb::addr_t ABIMacOSX_arm64::FixAddress(addr_t pc, addr_t mask) {
lldb::addr_t pac_sign_extension = 0x0080000000000000ULL;
// Darwin systems originally couldn't determine the proper value
// dynamically, so the most common value was hardcoded. This has
// largely been cleaned up, but there are still a handful of
// environments that assume the default value is set to this value
// and there's no dynamic value to correct it.
// When no mask is specified, set it to 39 bits of addressing (0..38).
if (mask == 0) {
// ~((1ULL<<39)-1)
mask = 0xffffff8000000000;
}
return (pc & pac_sign_extension) ? pc | mask : pc & (~mask);
}

View File

@ -5154,17 +5154,7 @@ void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header,
}
if (!found_any) {
if (header.filetype == MH_KEXT_BUNDLE) {
base_triple.setVendor(llvm::Triple::Apple);
add_triple(base_triple);
} else {
// We didn't find a LC_VERSION_MIN load command and this isn't a KEXT
// so lets not say our Vendor is Apple, leave it as an unspecified
// unknown.
base_triple.setVendor(llvm::Triple::UnknownVendor);
base_triple.setVendorName(llvm::StringRef());
add_triple(base_triple);
}
add_triple(base_triple);
}
}

View File

@ -859,6 +859,7 @@ bool ArchSpec::SetArchitecture(ArchitectureType arch_type, uint32_t cpu,
m_triple.setArchName(llvm::StringRef(core_def->name));
if (arch_type == eArchTypeMachO) {
m_triple.setVendor(llvm::Triple::Apple);
m_triple.setObjectFormat(llvm::Triple::MachO);
// Don't set the OS. It could be simulator, macosx, ios, watchos,
// tvos, bridgeos. We could get close with the cpu type - but we

View File

@ -0,0 +1,12 @@
C_SOURCES := main.c
# compile a.out and create-corefile
# create-corefile will create a custom corefile using the symbols
# addresses from the a.out binary.
all: a.out create-corefile
create-corefile:
$(MAKE) -f $(MAKEFILE_RULES) EXE=create-corefile \
C_SOURCES=create-corefile.c
include Makefile.rules

View File

@ -0,0 +1,50 @@
"""Test that lldb has a default mask for addressable bits on Darwin arm64 ABI"""
import os
import re
import subprocess
import lldb
from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
from lldbsuite.test import lldbutil
class TestCorefileDefaultPtrauth(TestBase):
mydir = TestBase.compute_mydir(__file__)
@skipIf(debug_info=no_match(["dsym"]), bugnumber="This test is looking explicitly for a dSYM")
@skipIf(archs=no_match(['arm64','arm64e']))
@skipUnlessDarwin
def test_lc_note(self):
self.build()
self.test_exe = self.getBuildArtifact("a.out")
self.create_corefile = self.getBuildArtifact("create-corefile")
self.corefile = self.getBuildArtifact("core")
### Create our corefile
retcode = call(self.create_corefile + " " + self.test_exe + " " + self.corefile, shell=True)
## This corefile has no metadata telling us how many bits are
## used for ptrauth signed function pointers. We will need lldb
## to fall back on its old default value for Darwin arm64 ABIs
## to correctly strip the bits.
self.target = self.dbg.CreateTarget('')
err = lldb.SBError()
self.process = self.target.LoadCore(self.corefile)
self.assertEqual(self.process.IsValid(), True)
modspec = lldb.SBModuleSpec()
modspec.SetFileSpec(lldb.SBFileSpec(self.test_exe, True))
m = self.target.AddModule(modspec)
self.assertTrue(m.IsValid())
self.target.SetModuleLoadAddress (m, 0)
# target variable should show us both the actual function
# pointer with ptrauth bits and the symbol it resolves to,
# with the ptrauth bits stripped, e.g.
# (int (*)(...)) fmain = 0xe46bff0100003f90 (actual=0x0000000100003f90 a.out`main at main.c:3)
self.expect("target variable fmain", substrs=['fmain = 0x', '(actual=0x', 'main at main.c'])

View File

@ -0,0 +1,131 @@
#include <mach-o/loader.h>
#include <stdio.h>
#include <stdlib.h>
#include <mach/machine.h>
#include <string.h>
#include <mach/machine/thread_state.h>
#include <inttypes.h>
#include <sys/syslimits.h>
// Given an executable binary with
// "fmain" (a function pointer to main)
// "main"
// symbols, create a fake arm64e corefile that
// contains a memory segment for the fmain
// function pointer, with the value of the
// address of main() with ptrauth bits masked on.
//
// The corefile does not include the "addrable bits"
// LC_NOTE, so lldb will need to fall back on its
// default value from the Darwin arm64 ABI.
int main(int argc, char **argv)
{
if (argc != 3) {
fprintf (stderr, "usage: %s executable-binary output-file\n", argv[0]);
exit(1);
}
FILE *exe = fopen(argv[1], "r");
if (!exe) {
fprintf (stderr, "Unable to open executable %s for reading\n", argv[1]);
exit(1);
}
FILE *out = fopen(argv[2], "w");
if (!out) {
fprintf (stderr, "Unable to open %s for writing\n", argv[2]);
exit(1);
}
char buf[PATH_MAX + 6];
sprintf (buf, "nm '%s'", argv[1]);
FILE *nm = popen(buf, "r");
if (!nm) {
fprintf (stderr, "Unable to run nm on '%s'", argv[1]);
exit (1);
}
uint64_t main_addr = 0;
uint64_t fmain_addr = 0;
while (fgets (buf, sizeof(buf), nm)) {
if (strstr (buf, "_fmain")) {
fmain_addr = strtoul (buf, NULL, 16);
}
if (strstr (buf, "_main")) {
main_addr = strtoul (buf, NULL, 16);
}
}
pclose (nm);
if (main_addr == 0 || fmain_addr == 0) {
fprintf(stderr, "Unable to find address of main or fmain in %s.\n",
argv[1]);
exit (1);
}
// Write out a corefile with contents in this order:
// 1. mach header
// 2. LC_THREAD load command
// 3. LC_SEGMENT_64 load command
// 4. memory segment contents
// struct thread_command {
// uint32_t cmd;
// uint32_t cmdsize;
// uint32_t flavor
// uint32_t count
// struct XXX_thread_state state
int size_of_thread_cmd = 4 + 4 + 4 + 4 + sizeof (arm_thread_state64_t);
struct mach_header_64 mh;
mh.magic = 0xfeedfacf;
mh.cputype = CPU_TYPE_ARM64;
mh.cpusubtype = CPU_SUBTYPE_ARM64E;
mh.filetype = MH_CORE;
mh.ncmds = 2; // LC_THREAD, LC_SEGMENT_64
mh.sizeofcmds = size_of_thread_cmd + sizeof(struct segment_command_64);
mh.flags = 0;
mh.reserved = 0;
fwrite(&mh, sizeof (mh), 1, out);
struct segment_command_64 seg;
seg.cmd = LC_SEGMENT_64;
seg.cmdsize = sizeof(seg);
memset (&seg.segname, 0, 16);
seg.vmaddr = fmain_addr;
seg.vmsize = 8;
// Offset to segment contents
seg.fileoff = sizeof (mh) + size_of_thread_cmd + sizeof(seg);
seg.filesize = 8;
seg.maxprot = 3;
seg.initprot = 3;
seg.nsects = 0;
seg.flags = 0;
fwrite (&seg, sizeof (seg), 1, out);
uint32_t cmd = LC_THREAD;
fwrite (&cmd, sizeof (cmd), 1, out);
uint32_t cmdsize = size_of_thread_cmd;
fwrite (&cmdsize, sizeof (cmdsize), 1, out);
uint32_t flavor = ARM_THREAD_STATE64;
fwrite (&flavor, sizeof (flavor), 1, out);
// count is number of uint32_t's of the register context
uint32_t count = sizeof (arm_thread_state64_t) / 4;
fwrite (&count, sizeof (count), 1, out);
arm_thread_state64_t regstate;
memset (&regstate, 0, sizeof (regstate));
fwrite (&regstate, sizeof (regstate), 1, out);
// Or together a random PAC value from a system using 39 bits
// of addressing with the address of main(). lldb will need
// to correctly strip off the high bits to find the address of
// main.
uint64_t segment_contents = 0xe46bff0000000000 | main_addr;
fwrite (&segment_contents, sizeof (segment_contents), 1, out);
fclose (out);
exit (0);
}

View File

@ -0,0 +1,6 @@
int main();
int (*fmain)() = main;
int main () {
return fmain();
}