diff --git a/llvm/include/llvm/MC/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler.h index d545fc7e4ed9..410d6d36f799 100644 --- a/llvm/include/llvm/MC/MCDisassembler.h +++ b/llvm/include/llvm/MC/MCDisassembler.h @@ -57,8 +57,7 @@ public: /// Constructor - Performs initial setup for the disassembler. MCDisassembler(const MCSubtargetInfo &STI) - : GetOpInfo(0), SymbolLookUp(0), DisInfo(0), Ctx(0), STI(STI), - Symbolizer(), CommentStream(0) {} + : STI(STI), Symbolizer(), CommentStream(0) {} virtual ~MCDisassembler(); @@ -84,19 +83,6 @@ public: uint64_t address, raw_ostream &vStream, raw_ostream &cStream) const = 0; -private: - // - // Hooks for symbolic disassembly via the public 'C' interface. - // - // The function to get the symbolic information for operands. - LLVMOpInfoCallback GetOpInfo; - // The function to lookup a symbol name. - LLVMSymbolLookupCallback SymbolLookUp; - // The pointer to the block of symbolic information for above call back. - void *DisInfo; - // The assembly context for creating symbols and MCExprs in place of - // immediate operands when there is symbolic information. - MCContext *Ctx; protected: // Subtarget information, for instruction decoding predicates if required. @@ -116,20 +102,6 @@ public: /// This takes ownership of \p Symzer, and deletes the previously set one. void setSymbolizer(std::unique_ptr Symzer); - /// Sets up an external symbolizer that uses the C API callbacks. - void setupForSymbolicDisassembly(LLVMOpInfoCallback GetOpInfo, - LLVMSymbolLookupCallback SymbolLookUp, - void *DisInfo, - MCContext *Ctx, - std::unique_ptr &RelInfo); - - LLVMOpInfoCallback getLLVMOpInfoCallback() const { return GetOpInfo; } - LLVMSymbolLookupCallback getLLVMSymbolLookupCallback() const { - return SymbolLookUp; - } - void *getDisInfoBlock() const { return DisInfo; } - MCContext *getMCContext() const { return Ctx; } - const MCSubtargetInfo& getSubtargetInfo() const { return STI; } // Marked mutable because we cache it inside the disassembler, rather than diff --git a/llvm/include/llvm/MC/MCExternalSymbolizer.h b/llvm/include/llvm/MC/MCExternalSymbolizer.h index cab915234f39..2c7d23707c95 100644 --- a/llvm/include/llvm/MC/MCExternalSymbolizer.h +++ b/llvm/include/llvm/MC/MCExternalSymbolizer.h @@ -26,7 +26,7 @@ namespace llvm { /// /// See llvm-c/Disassembler.h. class MCExternalSymbolizer : public MCSymbolizer { - +protected: /// \name Hooks for symbolic disassembly via the public 'C' interface. /// @{ /// The function to get the symbolic information for operands. diff --git a/llvm/lib/MC/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler.cpp index 7a2b1a14a5bb..77d9ce167548 100644 --- a/llvm/lib/MC/MCDisassembler.cpp +++ b/llvm/lib/MC/MCDisassembler.cpp @@ -16,20 +16,6 @@ using namespace llvm; MCDisassembler::~MCDisassembler() { } -void MCDisassembler::setupForSymbolicDisassembly( - LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp, - void *DisInfo, MCContext *Ctx, std::unique_ptr &RelInfo) { - this->GetOpInfo = GetOpInfo; - this->SymbolLookUp = SymbolLookUp; - this->DisInfo = DisInfo; - this->Ctx = Ctx; - assert(Ctx != 0 && "No MCContext given for symbolic disassembly"); - if (!Symbolizer) - Symbolizer.reset(new MCExternalSymbolizer(*Ctx, std::move(RelInfo), - GetOpInfo, SymbolLookUp, - DisInfo)); -} - bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, diff --git a/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/llvm/lib/MC/MCDisassembler/Disassembler.cpp index b935b839bd7b..b57b8aaa113e 100644 --- a/llvm/lib/MC/MCDisassembler/Disassembler.cpp +++ b/llvm/lib/MC/MCDisassembler/Disassembler.cpp @@ -82,8 +82,7 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, std::unique_ptr Symbolizer(TheTarget->createMCSymbolizer( Triple, GetOpInfo, SymbolLookUp, DisInfo, Ctx, RelInfo.release())); DisAsm->setSymbolizer(std::move(Symbolizer)); - DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, - Ctx, RelInfo); + // Set up the instruction printer. int AsmPrinterVariant = MAI->getAssemblerDialect(); MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant, diff --git a/llvm/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp b/llvm/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp index 8f9b79c90a81..34cc1b630c3b 100644 --- a/llvm/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp +++ b/llvm/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp @@ -13,19 +13,16 @@ #define DEBUG_TYPE "arm64-disassembler" #include "ARM64Disassembler.h" +#include "ARM64ExternalSymbolizer.h" #include "ARM64Subtarget.h" #include "MCTargetDesc/ARM64AddressingModes.h" #include "Utils/ARM64BaseInfo.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" // Pull DecodeStatus and its enum values into the global namespace. typedef llvm::MCDisassembler::DecodeStatus DecodeStatus; @@ -219,205 +216,23 @@ DecodeStatus ARM64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, return Success; } -static MCSymbolRefExpr::VariantKind -getVariant(uint64_t LLVMDisassembler_VariantKind) { - switch (LLVMDisassembler_VariantKind) { - case LLVMDisassembler_VariantKind_None: - return MCSymbolRefExpr::VK_None; - case LLVMDisassembler_VariantKind_ARM64_PAGE: - return MCSymbolRefExpr::VK_PAGE; - case LLVMDisassembler_VariantKind_ARM64_PAGEOFF: - return MCSymbolRefExpr::VK_PAGEOFF; - case LLVMDisassembler_VariantKind_ARM64_GOTPAGE: - return MCSymbolRefExpr::VK_GOTPAGE; - case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF: - return MCSymbolRefExpr::VK_GOTPAGEOFF; - case LLVMDisassembler_VariantKind_ARM64_TLVP: - case LLVMDisassembler_VariantKind_ARM64_TLVOFF: - default: - assert(0 && "bad LLVMDisassembler_VariantKind"); - return MCSymbolRefExpr::VK_None; - } -} - -/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic -/// operand in place of the immediate Value in the MCInst. The immediate -/// Value has not had any PC adjustment made by the caller. If the instruction -/// is a branch that adds the PC to the immediate Value then isBranch is -/// Success, else Fail. If the getOpInfo() function was set as part of the -/// setupForSymbolicDisassembly() call then that function is called to get any -/// symbolic information at the Address for this instrution. If that returns -/// non-zero then the symbolic information it returns is used to create an -/// MCExpr and that is added as an operand to the MCInst. If getOpInfo() -/// returns zero and isBranch is Success then a symbol look up for -/// Address + Value is done and if a symbol is found an MCExpr is created with -/// that, else an MCExpr with Address + Value is created. If getOpInfo() -/// returns zero and isBranch is Fail then the the Opcode of the MCInst is -/// tested and for ADRP an other instructions that help to load of pointers -/// a symbol look up is done to see it is returns a specific reference type -/// to add to the comment stream. This function returns Success if it adds -/// an operand to the MCInst and Fail otherwise. -bool ARM64Disassembler::tryAddingSymbolicOperand(uint64_t Address, int Value, - bool isBranch, - uint64_t InstSize, MCInst &MI, - uint32_t insn) const { - LLVMOpInfoCallback getOpInfo = getLLVMOpInfoCallback(); - - struct LLVMOpInfo1 SymbolicOp; - memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); - SymbolicOp.Value = Value; - void *DisInfo = getDisInfoBlock(); - uint64_t ReferenceType; - const char *ReferenceName; - const char *Name; - LLVMSymbolLookupCallback SymbolLookUp = getLLVMSymbolLookupCallback(); - if (!getOpInfo || - !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { - if (isBranch) { - if (SymbolLookUp) { - ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; - Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, - &ReferenceName); - if (Name) { - SymbolicOp.AddSymbol.Name = Name; - SymbolicOp.AddSymbol.Present = Success; - SymbolicOp.Value = 0; - } else { - SymbolicOp.Value = Address + Value; - } - if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) - (*CommentStream) << "symbol stub for: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Message) - (*CommentStream) << "Objc message: " << ReferenceName; - } else { - return false; - } - } else if (MI.getOpcode() == ARM64::ADRP) { - if (SymbolLookUp) { - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP; - Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address, - &ReferenceName); - (*CommentStream) << format("0x%llx", - 0xfffffffffffff000LL & (Address + Value)); - } else { - return false; - } - } else if (MI.getOpcode() == ARM64::ADDXri || - MI.getOpcode() == ARM64::LDRXui || - MI.getOpcode() == ARM64::LDRXl || MI.getOpcode() == ARM64::ADR) { - if (SymbolLookUp) { - if (MI.getOpcode() == ARM64::ADDXri) - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri; - else if (MI.getOpcode() == ARM64::LDRXui) - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui; - if (MI.getOpcode() == ARM64::LDRXl) { - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl; - Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, - &ReferenceName); - } else if (MI.getOpcode() == ARM64::ADR) { - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR; - Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, - &ReferenceName); - } else { - Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address, - &ReferenceName); - } - if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) - (*CommentStream) << "literal pool symbol address: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) - (*CommentStream) << "literal pool for: \"" << ReferenceName << "\""; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) - (*CommentStream) << "Objc cfstring ref: @\"" << ReferenceName << "\""; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Message) - (*CommentStream) << "Objc message: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) - (*CommentStream) << "Objc message ref: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) - (*CommentStream) << "Objc selector ref: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) - (*CommentStream) << "Objc class ref: " << ReferenceName; - // For these instructions, the SymbolLookUp() above is just to get the - // ReferenceType and ReferenceName. We want to make sure not to - // fall through so we don't build an MCExpr to leave the disassembly - // of the immediate values of these instructions to the InstPrinter. - return false; - } else { - return false; - } - } else { - return false; - } - } - - MCContext *Ctx = getMCContext(); - const MCExpr *Add = NULL; - if (SymbolicOp.AddSymbol.Present) { - if (SymbolicOp.AddSymbol.Name) { - StringRef Name(SymbolicOp.AddSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); - if (Variant != MCSymbolRefExpr::VK_None) - Add = MCSymbolRefExpr::Create(Sym, Variant, *Ctx); - else - Add = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); - } - } - - const MCExpr *Sub = NULL; - if (SymbolicOp.SubtractSymbol.Present) { - if (SymbolicOp.SubtractSymbol.Name) { - StringRef Name(SymbolicOp.SubtractSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - Sub = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); - } - } - - const MCExpr *Off = NULL; - if (SymbolicOp.Value != 0) - Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); - - const MCExpr *Expr; - if (Sub) { - const MCExpr *LHS; - if (Add) - LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); - else - LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); - else - Expr = LHS; - } else if (Add) { - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); - else - Expr = Add; - } else { - if (Off != 0) - Expr = Off; - else - Expr = MCConstantExpr::Create(0, *Ctx); - } - - MI.addOperand(MCOperand::CreateExpr(Expr)); - - return true; +MCSymbolizer *createARM64ExternalSymbolizer( + StringRef TT, + LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp, + void *DisInfo, MCContext *Ctx, + MCRelocationInfo *RelInfo) { + return new llvm::ARM64ExternalSymbolizer( + *Ctx, + std::unique_ptr(RelInfo), + GetOpInfo, SymbolLookUp, DisInfo); } extern "C" void LLVMInitializeARM64Disassembler() { TargetRegistry::RegisterMCDisassembler(TheARM64Target, createARM64Disassembler); + TargetRegistry::RegisterMCSymbolizer(TheARM64Target, + createARM64ExternalSymbolizer); } static const unsigned FPR128DecoderTable[] = { @@ -773,8 +588,8 @@ static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm, if (ImmVal & (1 << (19 - 1))) ImmVal |= ~((1LL << 19) - 1); - if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal << 2, - Inst.getOpcode() != ARM64::LDRXl, 4, Inst)) + if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal << 2, Addr, + Inst.getOpcode() != ARM64::LDRXl, 0, 4)) Inst.addOperand(MCOperand::CreateImm(ImmVal)); return Success; } @@ -1023,7 +838,7 @@ static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst, } DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - if (!Dis->tryAddingSymbolicOperand(Addr, offset, Fail, 4, Inst, insn)) + if (!Dis->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4)) Inst.addOperand(MCOperand::CreateImm(offset)); return Success; } @@ -1535,7 +1350,7 @@ static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn, imm |= ~((1LL << 21) - 1); DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - if (!Dis->tryAddingSymbolicOperand(Addr, imm, Fail, 4, Inst, insn)) + if (!Dis->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4)) Inst.addOperand(MCOperand::CreateImm(imm)); return Success; @@ -1571,7 +1386,7 @@ static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn, DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); } - if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal, Fail, 4, Inst, insn)) + if (!Dis->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4)) Inst.addOperand(MCOperand::CreateImm(ImmVal)); Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal)); return Success; @@ -1588,7 +1403,7 @@ static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn, if (imm & (1 << (26 - 1))) imm |= ~((1LL << 26) - 1); - if (!Dis->tryAddingSymbolicOperand(Addr, imm << 2, true, 4, Inst)) + if (!Dis->tryAddingSymbolicOperand(Inst, imm << 2, Addr, true, 0, 4)) Inst.addOperand(MCOperand::CreateImm(imm)); return Success; @@ -1627,7 +1442,7 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn, DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); Inst.addOperand(MCOperand::CreateImm(bit)); - if (!Dis->tryAddingSymbolicOperand(Addr, dst << 2, true, 4, Inst)) + if (!Dis->tryAddingSymbolicOperand(Inst, dst << 2, Addr, true, 0, 4)) Inst.addOperand(MCOperand::CreateImm(dst)); return Success; diff --git a/llvm/lib/Target/ARM64/Disassembler/ARM64Disassembler.h b/llvm/lib/Target/ARM64/Disassembler/ARM64Disassembler.h index 35efc8de42ac..95848d55fa4b 100644 --- a/llvm/lib/Target/ARM64/Disassembler/ARM64Disassembler.h +++ b/llvm/lib/Target/ARM64/Disassembler/ARM64Disassembler.h @@ -33,20 +33,6 @@ public: uint64_t address, raw_ostream &vStream, raw_ostream &cStream) const; - - /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic - /// operand in place of the immediate Value in the MCInst. The immediate - /// Value has not had any PC adjustment made by the caller. If the instruction - /// adds the PC to the immediate Value then InstsAddsAddressToValue is true, - /// else false. If the getOpInfo() function was set as part of the - /// setupForSymbolicDisassembly() call then that function is called to get any - /// symbolic information at the Address for this instrution. If that returns - /// non-zero then the symbolic information it returns is used to create an - /// MCExpr and that is added as an operand to the MCInst. This function - /// returns true if it adds an operand to the MCInst and false otherwise. - bool tryAddingSymbolicOperand(uint64_t Address, int Value, - bool InstsAddsAddressToValue, uint64_t InstSize, - MCInst &MI, uint32_t insn = 0) const; }; } // namespace llvm diff --git a/llvm/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp b/llvm/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp new file mode 100644 index 000000000000..65d4220488c8 --- /dev/null +++ b/llvm/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp @@ -0,0 +1,226 @@ +//===- ARM64ExternalSymbolizer.cpp - Symbolizer for ARM64 -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm64-disassembler" + +#include "ARM64ExternalSymbolizer.h" +#include "ARM64Subtarget.h" +#include "MCTargetDesc/ARM64AddressingModes.h" +#include "Utils/ARM64BaseInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static MCSymbolRefExpr::VariantKind +getVariant(uint64_t LLVMDisassembler_VariantKind) { + switch (LLVMDisassembler_VariantKind) { + case LLVMDisassembler_VariantKind_None: + return MCSymbolRefExpr::VK_None; + case LLVMDisassembler_VariantKind_ARM64_PAGE: + return MCSymbolRefExpr::VK_PAGE; + case LLVMDisassembler_VariantKind_ARM64_PAGEOFF: + return MCSymbolRefExpr::VK_PAGEOFF; + case LLVMDisassembler_VariantKind_ARM64_GOTPAGE: + return MCSymbolRefExpr::VK_GOTPAGE; + case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF: + return MCSymbolRefExpr::VK_GOTPAGEOFF; + case LLVMDisassembler_VariantKind_ARM64_TLVP: + case LLVMDisassembler_VariantKind_ARM64_TLVOFF: + default: + assert(0 && "bad LLVMDisassembler_VariantKind"); + return MCSymbolRefExpr::VK_None; + } +} + +/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic +/// operand in place of the immediate Value in the MCInst. The immediate +/// Value has not had any PC adjustment made by the caller. If the instruction +/// is a branch that adds the PC to the immediate Value then isBranch is +/// Success, else Fail. If GetOpInfo is non-null, then it is called to get any +/// symbolic information at the Address for this instrution. If that returns +/// non-zero then the symbolic information it returns is used to create an +/// MCExpr and that is added as an operand to the MCInst. If GetOpInfo() +/// returns zero and isBranch is Success then a symbol look up for +/// Address + Value is done and if a symbol is found an MCExpr is created with +/// that, else an MCExpr with Address + Value is created. If GetOpInfo() +/// returns zero and isBranch is Fail then the the Opcode of the MCInst is +/// tested and for ADRP an other instructions that help to load of pointers +/// a symbol look up is done to see it is returns a specific reference type +/// to add to the comment stream. This function returns Success if it adds +/// an operand to the MCInst and Fail otherwise. +bool ARM64ExternalSymbolizer::tryAddingSymbolicOperand( + MCInst &MI, + raw_ostream &CommentStream, + int64_t Value, + uint64_t Address, + bool IsBranch, + uint64_t Offset, + uint64_t InstSize) { + // FIXME: This method shares a lot of code with + // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible + // refactor the MCExternalSymbolizer interface to allow more of this + // implementation to be shared. + // + struct LLVMOpInfo1 SymbolicOp; + memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); + SymbolicOp.Value = Value; + uint64_t ReferenceType; + const char *ReferenceName; + if (!GetOpInfo || + !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { + if (IsBranch) { + ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; + const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, + Address, &ReferenceName); + if (Name) { + SymbolicOp.AddSymbol.Name = Name; + SymbolicOp.AddSymbol.Present = true; + SymbolicOp.Value = 0; + } else { + SymbolicOp.Value = Address + Value; + } + if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) + CommentStream << "symbol stub for: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Message) + CommentStream << "Objc message: " << ReferenceName; + } else if (MI.getOpcode() == ARM64::ADRP) { + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP; + // otool expects the fully encoded ADRP instruction to be passed in as + // the value here, so reconstruct it: + const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); + uint32_t EncodedInst = 0x90000000; + EncodedInst |= (Value & 0x3) << 29; // immlo + EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi + EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg + SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, + &ReferenceName); + CommentStream << format("0x%llx", + 0xfffffffffffff000LL & (Address + Value)); + } else if (MI.getOpcode() == ARM64::ADDXri || + MI.getOpcode() == ARM64::LDRXui || + MI.getOpcode() == ARM64::LDRXl || + MI.getOpcode() == ARM64::ADR) { + if (MI.getOpcode() == ARM64::ADDXri) + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri; + else if (MI.getOpcode() == ARM64::LDRXui) + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui; + if (MI.getOpcode() == ARM64::LDRXl) { + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl; + SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, + &ReferenceName); + } else if (MI.getOpcode() == ARM64::ADR) { + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR; + SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, + &ReferenceName); + } else { + const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); + // otool expects the fully encoded ADD/LDR instruction to be passed in + // as the value here, so reconstruct it: + unsigned EncodedInst = + MI.getOpcode() == ARM64::ADDXri ? 0x91000000: 0xF9400000; + EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD] + EncodedInst |= + MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn + EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd + + SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, + &ReferenceName); + } + if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) + CommentStream << "literal pool symbol address: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) + CommentStream << "literal pool for: \"" << ReferenceName << "\""; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) + CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\""; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Message) + CommentStream << "Objc message: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) + CommentStream << "Objc message ref: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) + CommentStream << "Objc selector ref: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) + CommentStream << "Objc class ref: " << ReferenceName; + // For these instructions, the SymbolLookUp() above is just to get the + // ReferenceType and ReferenceName. We want to make sure not to + // fall through so we don't build an MCExpr to leave the disassembly + // of the immediate values of these instructions to the InstPrinter. + return false; + } else { + return false; + } + } + + const MCExpr *Add = NULL; + if (SymbolicOp.AddSymbol.Present) { + if (SymbolicOp.AddSymbol.Name) { + StringRef Name(SymbolicOp.AddSymbol.Name); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); + MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); + if (Variant != MCSymbolRefExpr::VK_None) + Add = MCSymbolRefExpr::Create(Sym, Variant, Ctx); + else + Add = MCSymbolRefExpr::Create(Sym, Ctx); + } else { + Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, Ctx); + } + } + + const MCExpr *Sub = NULL; + if (SymbolicOp.SubtractSymbol.Present) { + if (SymbolicOp.SubtractSymbol.Name) { + StringRef Name(SymbolicOp.SubtractSymbol.Name); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); + Sub = MCSymbolRefExpr::Create(Sym, Ctx); + } else { + Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, Ctx); + } + } + + const MCExpr *Off = NULL; + if (SymbolicOp.Value != 0) + Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx); + + const MCExpr *Expr; + if (Sub) { + const MCExpr *LHS; + if (Add) + LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx); + else + LHS = MCUnaryExpr::CreateMinus(Sub, Ctx); + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx); + else + Expr = LHS; + } else if (Add) { + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx); + else + Expr = Add; + } else { + if (Off != 0) + Expr = Off; + else + Expr = MCConstantExpr::Create(0, Ctx); + } + + MI.addOperand(MCOperand::CreateExpr(Expr)); + + return true; +} diff --git a/llvm/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h b/llvm/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h new file mode 100644 index 000000000000..45f07a5e2587 --- /dev/null +++ b/llvm/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h @@ -0,0 +1,37 @@ +//===- ARM64ExternalSymbolizer.h - Symbolizer for ARM64 ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Symbolize ARM64 assembly code during disassembly using callbacks. +// +//===----------------------------------------------------------------------===// + +#ifndef ARM64EXTERNALSYMBOLIZER_H +#define ARM64EXTERNALSYMBOLIZER_H + +#include "llvm/MC/MCExternalSymbolizer.h" + +namespace llvm { + +class ARM64ExternalSymbolizer : public MCExternalSymbolizer { +public: + ARM64ExternalSymbolizer(MCContext &Ctx, + std::unique_ptr RelInfo, + LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo) + : MCExternalSymbolizer(Ctx, std::move(RelInfo), GetOpInfo, SymbolLookUp, + DisInfo) {} + + bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t InstSize) override; +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/ARM64/Disassembler/CMakeLists.txt b/llvm/lib/Target/ARM64/Disassembler/CMakeLists.txt index ad998c28c493..43ade66be144 100644 --- a/llvm/lib/Target/ARM64/Disassembler/CMakeLists.txt +++ b/llvm/lib/Target/ARM64/Disassembler/CMakeLists.txt @@ -2,6 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMARM64Disassembler ARM64Disassembler.cpp + ARM64ExternalSymbolizer.cpp ) # workaround for hanging compilation on MSVC8, 9 and 10 #if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )