Remove redundant symbolization support from MCDisassembler interface.

MCDisassembler has an MCSymbolizer member that is meant to take care of
symbolizing during disassembly, but it also has several methods that enable the
disassembler to do symbolization internally (i.e. without an attached symbolizer
object). There is no need for this duplication, but ARM64 had been making use of
it. This patch moves the ARM64 symbolization logic out of ARM64Disassembler and
into an ARM64ExternalSymbolizer class, and removes the duplicated MCSymbolizer
functionality from the MCDisassembler interface. Symbolization will now be
done exclusively through MCSymbolizers.

There should be no impact on disassembly for any platform, but this allows us to
tidy up the MCDisassembler interface and simplify the process of (and invariants
related to) disassembler setup.

llvm-svn: 206063
This commit is contained in:
Lang Hames 2014-04-11 20:07:58 +00:00
parent 13c310e3ac
commit 95400e22f9
9 changed files with 287 additions and 265 deletions

View File

@ -57,8 +57,7 @@ public:
/// Constructor - Performs initial setup for the disassembler.
MCDisassembler(const MCSubtargetInfo &STI)
: GetOpInfo(0), SymbolLookUp(0), DisInfo(0), Ctx(0), STI(STI),
Symbolizer(), CommentStream(0) {}
: STI(STI), Symbolizer(), CommentStream(0) {}
virtual ~MCDisassembler();
@ -84,19 +83,6 @@ public:
uint64_t address,
raw_ostream &vStream,
raw_ostream &cStream) const = 0;
private:
//
// Hooks for symbolic disassembly via the public 'C' interface.
//
// The function to get the symbolic information for operands.
LLVMOpInfoCallback GetOpInfo;
// The function to lookup a symbol name.
LLVMSymbolLookupCallback SymbolLookUp;
// The pointer to the block of symbolic information for above call back.
void *DisInfo;
// The assembly context for creating symbols and MCExprs in place of
// immediate operands when there is symbolic information.
MCContext *Ctx;
protected:
// Subtarget information, for instruction decoding predicates if required.
@ -116,20 +102,6 @@ public:
/// This takes ownership of \p Symzer, and deletes the previously set one.
void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
/// Sets up an external symbolizer that uses the C API callbacks.
void setupForSymbolicDisassembly(LLVMOpInfoCallback GetOpInfo,
LLVMSymbolLookupCallback SymbolLookUp,
void *DisInfo,
MCContext *Ctx,
std::unique_ptr<MCRelocationInfo> &RelInfo);
LLVMOpInfoCallback getLLVMOpInfoCallback() const { return GetOpInfo; }
LLVMSymbolLookupCallback getLLVMSymbolLookupCallback() const {
return SymbolLookUp;
}
void *getDisInfoBlock() const { return DisInfo; }
MCContext *getMCContext() const { return Ctx; }
const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
// Marked mutable because we cache it inside the disassembler, rather than

View File

@ -26,7 +26,7 @@ namespace llvm {
///
/// See llvm-c/Disassembler.h.
class MCExternalSymbolizer : public MCSymbolizer {
protected:
/// \name Hooks for symbolic disassembly via the public 'C' interface.
/// @{
/// The function to get the symbolic information for operands.

View File

@ -16,20 +16,6 @@ using namespace llvm;
MCDisassembler::~MCDisassembler() {
}
void MCDisassembler::setupForSymbolicDisassembly(
LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp,
void *DisInfo, MCContext *Ctx, std::unique_ptr<MCRelocationInfo> &RelInfo) {
this->GetOpInfo = GetOpInfo;
this->SymbolLookUp = SymbolLookUp;
this->DisInfo = DisInfo;
this->Ctx = Ctx;
assert(Ctx != 0 && "No MCContext given for symbolic disassembly");
if (!Symbolizer)
Symbolizer.reset(new MCExternalSymbolizer(*Ctx, std::move(RelInfo),
GetOpInfo, SymbolLookUp,
DisInfo));
}
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
uint64_t Address, bool IsBranch,
uint64_t Offset,

View File

@ -82,8 +82,7 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
std::unique_ptr<MCSymbolizer> Symbolizer(TheTarget->createMCSymbolizer(
Triple, GetOpInfo, SymbolLookUp, DisInfo, Ctx, RelInfo.release()));
DisAsm->setSymbolizer(std::move(Symbolizer));
DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo,
Ctx, RelInfo);
// Set up the instruction printer.
int AsmPrinterVariant = MAI->getAssemblerDialect();
MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,

View File

@ -13,19 +13,16 @@
#define DEBUG_TYPE "arm64-disassembler"
#include "ARM64Disassembler.h"
#include "ARM64ExternalSymbolizer.h"
#include "ARM64Subtarget.h"
#include "MCTargetDesc/ARM64AddressingModes.h"
#include "Utils/ARM64BaseInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
// Pull DecodeStatus and its enum values into the global namespace.
typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
@ -219,205 +216,23 @@ DecodeStatus ARM64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Success;
}
static MCSymbolRefExpr::VariantKind
getVariant(uint64_t LLVMDisassembler_VariantKind) {
switch (LLVMDisassembler_VariantKind) {
case LLVMDisassembler_VariantKind_None:
return MCSymbolRefExpr::VK_None;
case LLVMDisassembler_VariantKind_ARM64_PAGE:
return MCSymbolRefExpr::VK_PAGE;
case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
return MCSymbolRefExpr::VK_PAGEOFF;
case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
return MCSymbolRefExpr::VK_GOTPAGE;
case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
return MCSymbolRefExpr::VK_GOTPAGEOFF;
case LLVMDisassembler_VariantKind_ARM64_TLVP:
case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
default:
assert(0 && "bad LLVMDisassembler_VariantKind");
return MCSymbolRefExpr::VK_None;
}
}
/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
/// operand in place of the immediate Value in the MCInst. The immediate
/// Value has not had any PC adjustment made by the caller. If the instruction
/// is a branch that adds the PC to the immediate Value then isBranch is
/// Success, else Fail. If the getOpInfo() function was set as part of the
/// setupForSymbolicDisassembly() call then that function is called to get any
/// symbolic information at the Address for this instrution. If that returns
/// non-zero then the symbolic information it returns is used to create an
/// MCExpr and that is added as an operand to the MCInst. If getOpInfo()
/// returns zero and isBranch is Success then a symbol look up for
/// Address + Value is done and if a symbol is found an MCExpr is created with
/// that, else an MCExpr with Address + Value is created. If getOpInfo()
/// returns zero and isBranch is Fail then the the Opcode of the MCInst is
/// tested and for ADRP an other instructions that help to load of pointers
/// a symbol look up is done to see it is returns a specific reference type
/// to add to the comment stream. This function returns Success if it adds
/// an operand to the MCInst and Fail otherwise.
bool ARM64Disassembler::tryAddingSymbolicOperand(uint64_t Address, int Value,
bool isBranch,
uint64_t InstSize, MCInst &MI,
uint32_t insn) const {
LLVMOpInfoCallback getOpInfo = getLLVMOpInfoCallback();
struct LLVMOpInfo1 SymbolicOp;
memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
SymbolicOp.Value = Value;
void *DisInfo = getDisInfoBlock();
uint64_t ReferenceType;
const char *ReferenceName;
const char *Name;
LLVMSymbolLookupCallback SymbolLookUp = getLLVMSymbolLookupCallback();
if (!getOpInfo ||
!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
if (isBranch) {
if (SymbolLookUp) {
ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
&ReferenceName);
if (Name) {
SymbolicOp.AddSymbol.Name = Name;
SymbolicOp.AddSymbol.Present = Success;
SymbolicOp.Value = 0;
} else {
SymbolicOp.Value = Address + Value;
}
if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
(*CommentStream) << "symbol stub for: " << ReferenceName;
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_Objc_Message)
(*CommentStream) << "Objc message: " << ReferenceName;
} else {
return false;
}
} else if (MI.getOpcode() == ARM64::ADRP) {
if (SymbolLookUp) {
ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address,
&ReferenceName);
(*CommentStream) << format("0x%llx",
0xfffffffffffff000LL & (Address + Value));
} else {
return false;
}
} else if (MI.getOpcode() == ARM64::ADDXri ||
MI.getOpcode() == ARM64::LDRXui ||
MI.getOpcode() == ARM64::LDRXl || MI.getOpcode() == ARM64::ADR) {
if (SymbolLookUp) {
if (MI.getOpcode() == ARM64::ADDXri)
ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
else if (MI.getOpcode() == ARM64::LDRXui)
ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
if (MI.getOpcode() == ARM64::LDRXl) {
ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
&ReferenceName);
} else if (MI.getOpcode() == ARM64::ADR) {
ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
&ReferenceName);
} else {
Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address,
&ReferenceName);
}
if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
(*CommentStream) << "literal pool symbol address: " << ReferenceName;
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
(*CommentStream) << "literal pool for: \"" << ReferenceName << "\"";
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
(*CommentStream) << "Objc cfstring ref: @\"" << ReferenceName << "\"";
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_Objc_Message)
(*CommentStream) << "Objc message: " << ReferenceName;
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
(*CommentStream) << "Objc message ref: " << ReferenceName;
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
(*CommentStream) << "Objc selector ref: " << ReferenceName;
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
(*CommentStream) << "Objc class ref: " << ReferenceName;
// For these instructions, the SymbolLookUp() above is just to get the
// ReferenceType and ReferenceName. We want to make sure not to
// fall through so we don't build an MCExpr to leave the disassembly
// of the immediate values of these instructions to the InstPrinter.
return false;
} else {
return false;
}
} else {
return false;
}
}
MCContext *Ctx = getMCContext();
const MCExpr *Add = NULL;
if (SymbolicOp.AddSymbol.Present) {
if (SymbolicOp.AddSymbol.Name) {
StringRef Name(SymbolicOp.AddSymbol.Name);
MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
if (Variant != MCSymbolRefExpr::VK_None)
Add = MCSymbolRefExpr::Create(Sym, Variant, *Ctx);
else
Add = MCSymbolRefExpr::Create(Sym, *Ctx);
} else {
Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
}
}
const MCExpr *Sub = NULL;
if (SymbolicOp.SubtractSymbol.Present) {
if (SymbolicOp.SubtractSymbol.Name) {
StringRef Name(SymbolicOp.SubtractSymbol.Name);
MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
} else {
Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
}
}
const MCExpr *Off = NULL;
if (SymbolicOp.Value != 0)
Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
const MCExpr *Expr;
if (Sub) {
const MCExpr *LHS;
if (Add)
LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
else
LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
if (Off != 0)
Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
else
Expr = LHS;
} else if (Add) {
if (Off != 0)
Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
else
Expr = Add;
} else {
if (Off != 0)
Expr = Off;
else
Expr = MCConstantExpr::Create(0, *Ctx);
}
MI.addOperand(MCOperand::CreateExpr(Expr));
return true;
MCSymbolizer *createARM64ExternalSymbolizer(
StringRef TT,
LLVMOpInfoCallback GetOpInfo,
LLVMSymbolLookupCallback SymbolLookUp,
void *DisInfo, MCContext *Ctx,
MCRelocationInfo *RelInfo) {
return new llvm::ARM64ExternalSymbolizer(
*Ctx,
std::unique_ptr<MCRelocationInfo>(RelInfo),
GetOpInfo, SymbolLookUp, DisInfo);
}
extern "C" void LLVMInitializeARM64Disassembler() {
TargetRegistry::RegisterMCDisassembler(TheARM64Target,
createARM64Disassembler);
TargetRegistry::RegisterMCSymbolizer(TheARM64Target,
createARM64ExternalSymbolizer);
}
static const unsigned FPR128DecoderTable[] = {
@ -773,8 +588,8 @@ static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm,
if (ImmVal & (1 << (19 - 1)))
ImmVal |= ~((1LL << 19) - 1);
if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal << 2,
Inst.getOpcode() != ARM64::LDRXl, 4, Inst))
if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal << 2, Addr,
Inst.getOpcode() != ARM64::LDRXl, 0, 4))
Inst.addOperand(MCOperand::CreateImm(ImmVal));
return Success;
}
@ -1023,7 +838,7 @@ static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
}
DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
if (!Dis->tryAddingSymbolicOperand(Addr, offset, Fail, 4, Inst, insn))
if (!Dis->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4))
Inst.addOperand(MCOperand::CreateImm(offset));
return Success;
}
@ -1535,7 +1350,7 @@ static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
imm |= ~((1LL << 21) - 1);
DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
if (!Dis->tryAddingSymbolicOperand(Addr, imm, Fail, 4, Inst, insn))
if (!Dis->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4))
Inst.addOperand(MCOperand::CreateImm(imm));
return Success;
@ -1571,7 +1386,7 @@ static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
}
if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal, Fail, 4, Inst, insn))
if (!Dis->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4))
Inst.addOperand(MCOperand::CreateImm(ImmVal));
Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal));
return Success;
@ -1588,7 +1403,7 @@ static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
if (imm & (1 << (26 - 1)))
imm |= ~((1LL << 26) - 1);
if (!Dis->tryAddingSymbolicOperand(Addr, imm << 2, true, 4, Inst))
if (!Dis->tryAddingSymbolicOperand(Inst, imm << 2, Addr, true, 0, 4))
Inst.addOperand(MCOperand::CreateImm(imm));
return Success;
@ -1627,7 +1442,7 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
Inst.addOperand(MCOperand::CreateImm(bit));
if (!Dis->tryAddingSymbolicOperand(Addr, dst << 2, true, 4, Inst))
if (!Dis->tryAddingSymbolicOperand(Inst, dst << 2, Addr, true, 0, 4))
Inst.addOperand(MCOperand::CreateImm(dst));
return Success;

View File

@ -33,20 +33,6 @@ public:
uint64_t address,
raw_ostream &vStream,
raw_ostream &cStream) const;
/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
/// operand in place of the immediate Value in the MCInst. The immediate
/// Value has not had any PC adjustment made by the caller. If the instruction
/// adds the PC to the immediate Value then InstsAddsAddressToValue is true,
/// else false. If the getOpInfo() function was set as part of the
/// setupForSymbolicDisassembly() call then that function is called to get any
/// symbolic information at the Address for this instrution. If that returns
/// non-zero then the symbolic information it returns is used to create an
/// MCExpr and that is added as an operand to the MCInst. This function
/// returns true if it adds an operand to the MCInst and false otherwise.
bool tryAddingSymbolicOperand(uint64_t Address, int Value,
bool InstsAddsAddressToValue, uint64_t InstSize,
MCInst &MI, uint32_t insn = 0) const;
};
} // namespace llvm

View File

@ -0,0 +1,226 @@
//===- ARM64ExternalSymbolizer.cpp - Symbolizer for ARM64 -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm64-disassembler"
#include "ARM64ExternalSymbolizer.h"
#include "ARM64Subtarget.h"
#include "MCTargetDesc/ARM64AddressingModes.h"
#include "Utils/ARM64BaseInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
static MCSymbolRefExpr::VariantKind
getVariant(uint64_t LLVMDisassembler_VariantKind) {
switch (LLVMDisassembler_VariantKind) {
case LLVMDisassembler_VariantKind_None:
return MCSymbolRefExpr::VK_None;
case LLVMDisassembler_VariantKind_ARM64_PAGE:
return MCSymbolRefExpr::VK_PAGE;
case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
return MCSymbolRefExpr::VK_PAGEOFF;
case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
return MCSymbolRefExpr::VK_GOTPAGE;
case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
return MCSymbolRefExpr::VK_GOTPAGEOFF;
case LLVMDisassembler_VariantKind_ARM64_TLVP:
case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
default:
assert(0 && "bad LLVMDisassembler_VariantKind");
return MCSymbolRefExpr::VK_None;
}
}
/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
/// operand in place of the immediate Value in the MCInst. The immediate
/// Value has not had any PC adjustment made by the caller. If the instruction
/// is a branch that adds the PC to the immediate Value then isBranch is
/// Success, else Fail. If GetOpInfo is non-null, then it is called to get any
/// symbolic information at the Address for this instrution. If that returns
/// non-zero then the symbolic information it returns is used to create an
/// MCExpr and that is added as an operand to the MCInst. If GetOpInfo()
/// returns zero and isBranch is Success then a symbol look up for
/// Address + Value is done and if a symbol is found an MCExpr is created with
/// that, else an MCExpr with Address + Value is created. If GetOpInfo()
/// returns zero and isBranch is Fail then the the Opcode of the MCInst is
/// tested and for ADRP an other instructions that help to load of pointers
/// a symbol look up is done to see it is returns a specific reference type
/// to add to the comment stream. This function returns Success if it adds
/// an operand to the MCInst and Fail otherwise.
bool ARM64ExternalSymbolizer::tryAddingSymbolicOperand(
MCInst &MI,
raw_ostream &CommentStream,
int64_t Value,
uint64_t Address,
bool IsBranch,
uint64_t Offset,
uint64_t InstSize) {
// FIXME: This method shares a lot of code with
// MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
// refactor the MCExternalSymbolizer interface to allow more of this
// implementation to be shared.
//
struct LLVMOpInfo1 SymbolicOp;
memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
SymbolicOp.Value = Value;
uint64_t ReferenceType;
const char *ReferenceName;
if (!GetOpInfo ||
!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
if (IsBranch) {
ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
Address, &ReferenceName);
if (Name) {
SymbolicOp.AddSymbol.Name = Name;
SymbolicOp.AddSymbol.Present = true;
SymbolicOp.Value = 0;
} else {
SymbolicOp.Value = Address + Value;
}
if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
CommentStream << "symbol stub for: " << ReferenceName;
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_Objc_Message)
CommentStream << "Objc message: " << ReferenceName;
} else if (MI.getOpcode() == ARM64::ADRP) {
ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
// otool expects the fully encoded ADRP instruction to be passed in as
// the value here, so reconstruct it:
const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
uint32_t EncodedInst = 0x90000000;
EncodedInst |= (Value & 0x3) << 29; // immlo
EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi
EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg
SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
&ReferenceName);
CommentStream << format("0x%llx",
0xfffffffffffff000LL & (Address + Value));
} else if (MI.getOpcode() == ARM64::ADDXri ||
MI.getOpcode() == ARM64::LDRXui ||
MI.getOpcode() == ARM64::LDRXl ||
MI.getOpcode() == ARM64::ADR) {
if (MI.getOpcode() == ARM64::ADDXri)
ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
else if (MI.getOpcode() == ARM64::LDRXui)
ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
if (MI.getOpcode() == ARM64::LDRXl) {
ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
&ReferenceName);
} else if (MI.getOpcode() == ARM64::ADR) {
ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
&ReferenceName);
} else {
const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
// otool expects the fully encoded ADD/LDR instruction to be passed in
// as the value here, so reconstruct it:
unsigned EncodedInst =
MI.getOpcode() == ARM64::ADDXri ? 0x91000000: 0xF9400000;
EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
EncodedInst |=
MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn
EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd
SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
&ReferenceName);
}
if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
CommentStream << "literal pool symbol address: " << ReferenceName;
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
CommentStream << "literal pool for: \"" << ReferenceName << "\"";
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_Objc_Message)
CommentStream << "Objc message: " << ReferenceName;
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
CommentStream << "Objc message ref: " << ReferenceName;
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
CommentStream << "Objc selector ref: " << ReferenceName;
else if (ReferenceType ==
LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
CommentStream << "Objc class ref: " << ReferenceName;
// For these instructions, the SymbolLookUp() above is just to get the
// ReferenceType and ReferenceName. We want to make sure not to
// fall through so we don't build an MCExpr to leave the disassembly
// of the immediate values of these instructions to the InstPrinter.
return false;
} else {
return false;
}
}
const MCExpr *Add = NULL;
if (SymbolicOp.AddSymbol.Present) {
if (SymbolicOp.AddSymbol.Name) {
StringRef Name(SymbolicOp.AddSymbol.Name);
MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
if (Variant != MCSymbolRefExpr::VK_None)
Add = MCSymbolRefExpr::Create(Sym, Variant, Ctx);
else
Add = MCSymbolRefExpr::Create(Sym, Ctx);
} else {
Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, Ctx);
}
}
const MCExpr *Sub = NULL;
if (SymbolicOp.SubtractSymbol.Present) {
if (SymbolicOp.SubtractSymbol.Name) {
StringRef Name(SymbolicOp.SubtractSymbol.Name);
MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
Sub = MCSymbolRefExpr::Create(Sym, Ctx);
} else {
Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, Ctx);
}
}
const MCExpr *Off = NULL;
if (SymbolicOp.Value != 0)
Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx);
const MCExpr *Expr;
if (Sub) {
const MCExpr *LHS;
if (Add)
LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx);
else
LHS = MCUnaryExpr::CreateMinus(Sub, Ctx);
if (Off != 0)
Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx);
else
Expr = LHS;
} else if (Add) {
if (Off != 0)
Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx);
else
Expr = Add;
} else {
if (Off != 0)
Expr = Off;
else
Expr = MCConstantExpr::Create(0, Ctx);
}
MI.addOperand(MCOperand::CreateExpr(Expr));
return true;
}

View File

@ -0,0 +1,37 @@
//===- ARM64ExternalSymbolizer.h - Symbolizer for ARM64 ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Symbolize ARM64 assembly code during disassembly using callbacks.
//
//===----------------------------------------------------------------------===//
#ifndef ARM64EXTERNALSYMBOLIZER_H
#define ARM64EXTERNALSYMBOLIZER_H
#include "llvm/MC/MCExternalSymbolizer.h"
namespace llvm {
class ARM64ExternalSymbolizer : public MCExternalSymbolizer {
public:
ARM64ExternalSymbolizer(MCContext &Ctx,
std::unique_ptr<MCRelocationInfo> RelInfo,
LLVMOpInfoCallback GetOpInfo,
LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo)
: MCExternalSymbolizer(Ctx, std::move(RelInfo), GetOpInfo, SymbolLookUp,
DisInfo) {}
bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
int64_t Value, uint64_t Address, bool IsBranch,
uint64_t Offset, uint64_t InstSize) override;
};
} // namespace llvm
#endif

View File

@ -2,6 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMARM64Disassembler
ARM64Disassembler.cpp
ARM64ExternalSymbolizer.cpp
)
# workaround for hanging compilation on MSVC8, 9 and 10
#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )