forked from OSchip/llvm-project
Symbolicate aarch64 adrp+add pc-relative addr in disass
On aarch64 a two instruction sequence is used to calculate a pc-relative address, add some state to the DisassemblerLLVMC symbolicator so it can track the necessary data across the two instructions and compute the address being calculated. Differential Revision: https://reviews.llvm.org/D107213 rdar://49119253
This commit is contained in:
parent
8930af45c3
commit
7150b56208
|
@ -1030,7 +1030,8 @@ bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {
|
|||
DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch,
|
||||
const char *flavor_string)
|
||||
: Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr),
|
||||
m_data_from_file(false) {
|
||||
m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS),
|
||||
m_adrp_insn() {
|
||||
if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {
|
||||
m_flavor.assign("default");
|
||||
}
|
||||
|
@ -1310,6 +1311,46 @@ const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
|
|||
Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr;
|
||||
Address value_so_addr;
|
||||
Address pc_so_addr;
|
||||
if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 ||
|
||||
target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be ||
|
||||
target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) {
|
||||
if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) {
|
||||
m_adrp_address = pc;
|
||||
m_adrp_insn = value;
|
||||
*name = nullptr;
|
||||
*type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
|
||||
return nullptr;
|
||||
}
|
||||
// If this instruction is an ADD and
|
||||
// the previous instruction was an ADRP and
|
||||
// the ADRP's register and this ADD's register are the same,
|
||||
// then this is a pc-relative address calculation.
|
||||
if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri &&
|
||||
m_adrp_insn.hasValue() && m_adrp_address == pc - 4 &&
|
||||
(m_adrp_insn.getValue() & 0x1f) == ((value >> 5) & 0x1f)) {
|
||||
uint32_t addxri_inst;
|
||||
uint64_t adrp_imm, addxri_imm;
|
||||
// Get immlo and immhi bits, OR them together to get the ADRP imm
|
||||
// value.
|
||||
adrp_imm = ((m_adrp_insn.getValue() & 0x00ffffe0) >> 3) |
|
||||
((m_adrp_insn.getValue() >> 29) & 0x3);
|
||||
// if high bit of immhi after right-shifting set, sign extend
|
||||
if (adrp_imm & (1ULL << 20))
|
||||
adrp_imm |= ~((1ULL << 21) - 1);
|
||||
|
||||
addxri_inst = value;
|
||||
addxri_imm = (addxri_inst >> 10) & 0xfff;
|
||||
// check if 'sh' bit is set, shift imm value up if so
|
||||
// (this would make no sense, ADRP already gave us this part)
|
||||
if ((addxri_inst >> (12 + 5 + 5)) & 1)
|
||||
addxri_imm <<= 12;
|
||||
value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) +
|
||||
addxri_imm;
|
||||
}
|
||||
m_adrp_address = LLDB_INVALID_ADDRESS;
|
||||
m_adrp_insn.reset();
|
||||
}
|
||||
|
||||
if (m_inst->UsingFileAddress()) {
|
||||
ModuleSP module_sp(m_inst->GetAddress().GetModule());
|
||||
if (module_sp) {
|
||||
|
@ -1371,6 +1412,12 @@ const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: llvm-objdump sets the type_ptr to the
|
||||
// LLVMDisassembler_ReferenceType_Out_* values
|
||||
// based on where value_so_addr is pointing, with
|
||||
// Mach-O specific augmentations in MachODump.cpp. e.g.
|
||||
// see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand
|
||||
// handles.
|
||||
*type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
|
||||
*name = nullptr;
|
||||
return nullptr;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "lldb/Core/Address.h"
|
||||
#include "lldb/Core/Disassembler.h"
|
||||
#include "lldb/Core/PluginManager.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
|
||||
class InstructionLLVMC;
|
||||
|
||||
|
@ -73,6 +74,12 @@ protected:
|
|||
InstructionLLVMC *m_inst;
|
||||
std::mutex m_mutex;
|
||||
bool m_data_from_file;
|
||||
// Save the AArch64 ADRP instruction word and address it was at,
|
||||
// in case the next instruction is an ADD to the same register;
|
||||
// this is a pc-relative address calculation and we need both
|
||||
// parts to calculate the symbolication.
|
||||
lldb::addr_t m_adrp_address;
|
||||
llvm::Optional<uint32_t> m_adrp_insn;
|
||||
|
||||
// Since we need to make two actual MC Disassemblers for ARM (ARM & THUMB),
|
||||
// and there's a bit of goo to set up and own in the MC disassembler world,
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
"""
|
||||
Test that the arm64 ADRP + ADD pc-relative addressing pair is symbolicated.
|
||||
"""
|
||||
|
||||
from lldbsuite.test.decorators import *
|
||||
from lldbsuite.test.lldbtest import *
|
||||
from lldbsuite.test import lldbutil
|
||||
|
||||
class TestAArch64AdrpAdd(TestBase):
|
||||
|
||||
mydir = TestBase.compute_mydir(__file__)
|
||||
|
||||
@no_debug_info_test
|
||||
def test_arm64(self):
|
||||
src_dir = self.getSourceDir()
|
||||
yaml_path = os.path.join(src_dir, "a.out-arm64.yaml")
|
||||
obj_path = self.getBuildArtifact("a.out-arm64")
|
||||
self.yaml2obj(yaml_path, obj_path)
|
||||
|
||||
target = self.dbg.CreateTarget(obj_path)
|
||||
self.assertTrue(target, VALID_TARGET)
|
||||
|
||||
mains = target.FindFunctions("main")
|
||||
for f in mains.symbols:
|
||||
binaryname = f.GetStartAddress().GetModule().GetFileSpec().GetFilename()
|
||||
if binaryname == "a.out-arm64":
|
||||
self.disassemble_check_for_hi_and_foo(target, f, binaryname)
|
||||
|
||||
@no_debug_info_test
|
||||
def test_arm64_32(self):
|
||||
src_dir = self.getSourceDir()
|
||||
yaml_path = os.path.join(src_dir, "a.out-arm64_32.yaml")
|
||||
obj_path = self.getBuildArtifact("a.out-arm64_32")
|
||||
self.yaml2obj(yaml_path, obj_path)
|
||||
|
||||
target = self.dbg.CreateTarget(obj_path)
|
||||
self.assertTrue(target, VALID_TARGET)
|
||||
|
||||
mains = target.FindFunctions("main")
|
||||
for f in mains.symbols:
|
||||
binaryname = f.GetStartAddress().GetModule().GetFileSpec().GetFilename()
|
||||
if binaryname == "a.out-arm64_32":
|
||||
self.disassemble_check_for_hi_and_foo(target, f, binaryname)
|
||||
|
||||
def disassemble_check_for_hi_and_foo(self, target, func, binaryname):
|
||||
insns = func.GetInstructions(target)
|
||||
found_hi_string = False
|
||||
found_foo = False
|
||||
|
||||
# The binary has an ADRP + ADD instruction pair which load
|
||||
# the pc-relative address of a c-string, and loads the address
|
||||
# of a function into a function pointer. lldb should show
|
||||
# that c-string and the name of that function in the disassembly
|
||||
# comment field.
|
||||
for i in insns:
|
||||
if "HI" in i.GetComment(target):
|
||||
found_hi_string = True
|
||||
if "foo" in i.GetComment(target):
|
||||
found_foo = True
|
||||
if found_hi_string == False or found_foo == False:
|
||||
print('Did not find "HI" string or "foo" in disassembly symbolication in %s' % binaryname)
|
||||
if self.TraceOn():
|
||||
strm = lldb.SBStream()
|
||||
insns.GetDescription(strm)
|
||||
print('Disassembly of main(), looking for "HI" and "foo" in comments:')
|
||||
print(strm.GetData())
|
||||
self.assertTrue(found_hi_string)
|
||||
self.assertTrue(found_foo)
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,110 @@
|
|||
#include <stdio.h>
|
||||
|
||||
// For the test case, we really want the the layout of this binary
|
||||
// to be:
|
||||
//
|
||||
// foo()
|
||||
// bar() - 4096 bytes of nop's
|
||||
// main()
|
||||
// "HI" string
|
||||
//
|
||||
// in reality getting this layout from the compiler and linker
|
||||
// is a crapshoot, so I have yaml's checked in of the correct
|
||||
// layout. Recompiling from source may not get the needed
|
||||
// binary layout.
|
||||
|
||||
static int bar();
|
||||
static int foo() { return 5 + bar(); }
|
||||
// A function of 4096 bytes, so when main() loads the
|
||||
// address of foo() before this one, it has to subtract
|
||||
// a 4096 page.
|
||||
#define SIXTY_FOUR_BYTES_NOP \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop"); \
|
||||
asm("nop");
|
||||
|
||||
static int bar() {
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
SIXTY_FOUR_BYTES_NOP;
|
||||
return 5;
|
||||
}
|
||||
int main() {
|
||||
int (*f)(void) = foo;
|
||||
puts("HI");
|
||||
return f();
|
||||
}
|
Loading…
Reference in New Issue