Symbolicate aarch64 adrp+add pc-relative addr in disass

On aarch64 a two instruction sequence is used to calculate a
pc-relative address, add some state to the DisassemblerLLVMC
symbolicator so it can track the necessary data across the
two instructions and compute the address being calculated.

Differential Revision: https://reviews.llvm.org/D107213
rdar://49119253
This commit is contained in:
Jason Molenda 2021-08-12 14:29:30 -07:00
parent 8930af45c3
commit 7150b56208
6 changed files with 1009 additions and 1 deletions

View File

@ -1030,7 +1030,8 @@ bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {
DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch,
const char *flavor_string)
: Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr),
m_data_from_file(false) {
m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS),
m_adrp_insn() {
if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {
m_flavor.assign("default");
}
@ -1310,6 +1311,46 @@ const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr;
Address value_so_addr;
Address pc_so_addr;
if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 ||
target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be ||
target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) {
if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) {
m_adrp_address = pc;
m_adrp_insn = value;
*name = nullptr;
*type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
return nullptr;
}
// If this instruction is an ADD and
// the previous instruction was an ADRP and
// the ADRP's register and this ADD's register are the same,
// then this is a pc-relative address calculation.
if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri &&
m_adrp_insn.hasValue() && m_adrp_address == pc - 4 &&
(m_adrp_insn.getValue() & 0x1f) == ((value >> 5) & 0x1f)) {
uint32_t addxri_inst;
uint64_t adrp_imm, addxri_imm;
// Get immlo and immhi bits, OR them together to get the ADRP imm
// value.
adrp_imm = ((m_adrp_insn.getValue() & 0x00ffffe0) >> 3) |
((m_adrp_insn.getValue() >> 29) & 0x3);
// if high bit of immhi after right-shifting set, sign extend
if (adrp_imm & (1ULL << 20))
adrp_imm |= ~((1ULL << 21) - 1);
addxri_inst = value;
addxri_imm = (addxri_inst >> 10) & 0xfff;
// check if 'sh' bit is set, shift imm value up if so
// (this would make no sense, ADRP already gave us this part)
if ((addxri_inst >> (12 + 5 + 5)) & 1)
addxri_imm <<= 12;
value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) +
addxri_imm;
}
m_adrp_address = LLDB_INVALID_ADDRESS;
m_adrp_insn.reset();
}
if (m_inst->UsingFileAddress()) {
ModuleSP module_sp(m_inst->GetAddress().GetModule());
if (module_sp) {
@ -1371,6 +1412,12 @@ const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
}
}
// TODO: llvm-objdump sets the type_ptr to the
// LLVMDisassembler_ReferenceType_Out_* values
// based on where value_so_addr is pointing, with
// Mach-O specific augmentations in MachODump.cpp. e.g.
// see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand
// handles.
*type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
*name = nullptr;
return nullptr;

View File

@ -16,6 +16,7 @@
#include "lldb/Core/Address.h"
#include "lldb/Core/Disassembler.h"
#include "lldb/Core/PluginManager.h"
#include "llvm/ADT/Optional.h"
class InstructionLLVMC;
@ -73,6 +74,12 @@ protected:
InstructionLLVMC *m_inst;
std::mutex m_mutex;
bool m_data_from_file;
// Save the AArch64 ADRP instruction word and address it was at,
// in case the next instruction is an ADD to the same register;
// this is a pc-relative address calculation and we need both
// parts to calculate the symbolication.
lldb::addr_t m_adrp_address;
llvm::Optional<uint32_t> m_adrp_insn;
// Since we need to make two actual MC Disassemblers for ARM (ARM & THUMB),
// and there's a bit of goo to set up and own in the MC disassembler world,

View File

@ -0,0 +1,68 @@
"""
Test that the arm64 ADRP + ADD pc-relative addressing pair is symbolicated.
"""
from lldbsuite.test.decorators import *
from lldbsuite.test.lldbtest import *
from lldbsuite.test import lldbutil
class TestAArch64AdrpAdd(TestBase):
mydir = TestBase.compute_mydir(__file__)
@no_debug_info_test
def test_arm64(self):
src_dir = self.getSourceDir()
yaml_path = os.path.join(src_dir, "a.out-arm64.yaml")
obj_path = self.getBuildArtifact("a.out-arm64")
self.yaml2obj(yaml_path, obj_path)
target = self.dbg.CreateTarget(obj_path)
self.assertTrue(target, VALID_TARGET)
mains = target.FindFunctions("main")
for f in mains.symbols:
binaryname = f.GetStartAddress().GetModule().GetFileSpec().GetFilename()
if binaryname == "a.out-arm64":
self.disassemble_check_for_hi_and_foo(target, f, binaryname)
@no_debug_info_test
def test_arm64_32(self):
src_dir = self.getSourceDir()
yaml_path = os.path.join(src_dir, "a.out-arm64_32.yaml")
obj_path = self.getBuildArtifact("a.out-arm64_32")
self.yaml2obj(yaml_path, obj_path)
target = self.dbg.CreateTarget(obj_path)
self.assertTrue(target, VALID_TARGET)
mains = target.FindFunctions("main")
for f in mains.symbols:
binaryname = f.GetStartAddress().GetModule().GetFileSpec().GetFilename()
if binaryname == "a.out-arm64_32":
self.disassemble_check_for_hi_and_foo(target, f, binaryname)
def disassemble_check_for_hi_and_foo(self, target, func, binaryname):
insns = func.GetInstructions(target)
found_hi_string = False
found_foo = False
# The binary has an ADRP + ADD instruction pair which load
# the pc-relative address of a c-string, and loads the address
# of a function into a function pointer. lldb should show
# that c-string and the name of that function in the disassembly
# comment field.
for i in insns:
if "HI" in i.GetComment(target):
found_hi_string = True
if "foo" in i.GetComment(target):
found_foo = True
if found_hi_string == False or found_foo == False:
print('Did not find "HI" string or "foo" in disassembly symbolication in %s' % binaryname)
if self.TraceOn():
strm = lldb.SBStream()
insns.GetDescription(strm)
print('Disassembly of main(), looking for "HI" and "foo" in comments:')
print(strm.GetData())
self.assertTrue(found_hi_string)
self.assertTrue(found_foo)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,110 @@
#include <stdio.h>
// For the test case, we really want the the layout of this binary
// to be:
//
// foo()
// bar() - 4096 bytes of nop's
// main()
// "HI" string
//
// in reality getting this layout from the compiler and linker
// is a crapshoot, so I have yaml's checked in of the correct
// layout. Recompiling from source may not get the needed
// binary layout.
static int bar();
static int foo() { return 5 + bar(); }
// A function of 4096 bytes, so when main() loads the
// address of foo() before this one, it has to subtract
// a 4096 page.
#define SIXTY_FOUR_BYTES_NOP \
asm("nop"); \
asm("nop"); \
asm("nop"); \
asm("nop"); \
asm("nop"); \
asm("nop"); \
asm("nop"); \
asm("nop"); \
asm("nop"); \
asm("nop"); \
asm("nop"); \
asm("nop"); \
asm("nop"); \
asm("nop"); \
asm("nop"); \
asm("nop");
static int bar() {
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
SIXTY_FOUR_BYTES_NOP;
return 5;
}
int main() {
int (*f)(void) = foo;
puts("HI");
return f();
}