Nuke MCAnalysis.

The code is buggy and barely tested. It is also mostly boilerplate.
(This includes MCObjectDisassembler, which is the interface to that
functionality)

Following an IRC discussion with Jim Grosbach, it seems sensible to just
nuke the whole lot of functionality, and dig it up from VCS if
necessary (I hope not!).

All of this stuff appears to have been added in a huge patch dump (look
at the timeframe surrounding e.g. r182628) where almost every patch
seemed to be untested and not reviewed before being committed.
Post-review responses to the patches were never addressed. I don't think
any of it would have passed pre-commit review.

I doubt anyone is depending on this, since this code appears to be
extremely buggy. In limited testing that Michael Spencer and I did, we
couldn't find a single real-world object file that wouldn't crash the
CFG reconstruction stuff. The symbolizer stuff has O(n^2) behavior and
so is not much use to anyone anyway. It seemed simpler to remove them as
a whole. Most of this code is boilerplate, which is the only way it was
able to scrape by 60% coverage.

HEADSUP: Modules folks, some files I nuked were referenced from
include/llvm/module.modulemap; I just deleted the references. Hopefully
that is the right fix (one was a FIXME though!).

llvm-svn: 216983
This commit is contained in:
Sean Silva 2014-09-02 22:32:20 +00:00
parent 79cc1e3ae7
commit 888320e9fa
31 changed files with 5 additions and 2854 deletions

View File

@ -1,199 +0,0 @@
//===-- MCAtom.h ------------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the MCAtom class, which is used to
// represent a contiguous region in a decoded object that is uniformly data or
// instructions.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_MC_MCANALYSIS_MCATOM_H
#define LLVM_MC_MCANALYSIS_MCATOM_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/DataTypes.h"
#include <vector>
namespace llvm {
class MCModule;
class MCAtom;
class MCTextAtom;
class MCDataAtom;
/// \brief Represents a contiguous range of either instructions (a TextAtom)
/// or data (a DataAtom). Address ranges are expressed as _closed_ intervals.
class MCAtom {
virtual void anchor();
public:
virtual ~MCAtom() {}
enum AtomKind { TextAtom, DataAtom };
AtomKind getKind() const { return Kind; }
/// \brief Get the start address of the atom.
uint64_t getBeginAddr() const { return Begin; }
/// \brief Get the end address, i.e. the last one inside the atom.
uint64_t getEndAddr() const { return End; }
/// \name Atom modification methods:
/// When modifying a TextAtom, keep instruction boundaries in mind.
/// For instance, split must me given the start address of an instruction.
/// @{
/// \brief Splits the atom in two at a given address.
/// \param SplitPt Address at which to start a new atom, splitting this one.
/// \returns The newly created atom starting at \p SplitPt.
virtual MCAtom *split(uint64_t SplitPt) = 0;
/// \brief Truncates an atom, discarding everything after \p TruncPt.
/// \param TruncPt Last byte address to be contained in this atom.
virtual void truncate(uint64_t TruncPt) = 0;
/// @}
/// \name Naming:
///
/// This is mostly for display purposes, and may contain anything that hints
/// at what the atom contains: section or symbol name, BB start address, ..
/// @{
StringRef getName() const { return Name; }
void setName(StringRef NewName) { Name = NewName.str(); }
/// @}
protected:
const AtomKind Kind;
std::string Name;
MCModule *Parent;
uint64_t Begin, End;
friend class MCModule;
MCAtom(AtomKind K, MCModule *P, uint64_t B, uint64_t E)
: Kind(K), Name("(unknown)"), Parent(P), Begin(B), End(E) { }
/// \name Atom remapping helpers
/// @{
/// \brief Remap the atom, using the given range, updating Begin/End.
/// One or both of the bounds can remain the same, but overlapping with other
/// atoms in the module is still forbidden.
void remap(uint64_t NewBegin, uint64_t NewEnd);
/// \brief Remap the atom to prepare for a truncation at TruncPt.
/// Equivalent to:
/// \code
/// // Bound checks
/// remap(Begin, TruncPt);
/// \endcode
void remapForTruncate(uint64_t TruncPt);
/// \brief Remap the atom to prepare for a split at SplitPt.
/// The bounds for the resulting atoms are returned in {L,R}{Begin,End}.
/// The current atom is truncated to \p LEnd.
void remapForSplit(uint64_t SplitPt,
uint64_t &LBegin, uint64_t &LEnd,
uint64_t &RBegin, uint64_t &REnd);
/// @}
};
/// \name Text atom
/// @{
/// \brief An entry in an MCTextAtom: a disassembled instruction.
/// NOTE: Both the Address and Size field are actually redundant when taken in
/// the context of the text atom, and may better be exposed in an iterator
/// instead of stored in the atom, which would replace this class.
class MCDecodedInst {
public:
MCInst Inst;
uint64_t Address;
uint64_t Size;
MCDecodedInst(const MCInst &Inst, uint64_t Address, uint64_t Size)
: Inst(Inst), Address(Address), Size(Size) {}
};
/// \brief An atom consisting of disassembled instructions.
class MCTextAtom : public MCAtom {
private:
typedef std::vector<MCDecodedInst> InstListTy;
InstListTy Insts;
/// \brief The address of the next appended instruction, i.e., the
/// address immediately after the last instruction in the atom.
uint64_t NextInstAddress;
public:
/// Append an instruction, expanding the atom if necessary.
void addInst(const MCInst &Inst, uint64_t Size);
/// \name Instruction list access
/// @{
typedef InstListTy::const_iterator const_iterator;
const_iterator begin() const { return Insts.begin(); }
const_iterator end() const { return Insts.end(); }
const MCDecodedInst &back() const { return Insts.back(); }
const MCDecodedInst &at(size_t n) const { return Insts.at(n); }
size_t size() const { return Insts.size(); }
/// @}
/// \name Atom type specific split/truncate logic.
/// @{
MCTextAtom *split(uint64_t SplitPt) override;
void truncate(uint64_t TruncPt) override;
/// @}
// Class hierarchy.
static bool classof(const MCAtom *A) { return A->getKind() == TextAtom; }
private:
friend class MCModule;
// Private constructor - only callable by MCModule
MCTextAtom(MCModule *P, uint64_t Begin, uint64_t End)
: MCAtom(TextAtom, P, Begin, End), NextInstAddress(Begin) {}
};
/// @}
/// \name Data atom
/// @{
/// \brief An entry in an MCDataAtom.
// NOTE: This may change to a more complex type in the future.
typedef uint8_t MCData;
/// \brief An atom consising of a sequence of bytes.
class MCDataAtom : public MCAtom {
std::vector<MCData> Data;
public:
/// Append a data entry, expanding the atom if necessary.
void addData(const MCData &D);
/// Get a reference to the data in this atom.
ArrayRef<MCData> getData() const { return Data; }
/// \name Atom type specific split/truncate logic.
/// @{
MCDataAtom *split(uint64_t SplitPt) override;
void truncate(uint64_t TruncPt) override;
/// @}
// Class hierarchy.
static bool classof(const MCAtom *A) { return A->getKind() == DataAtom; }
private:
friend class MCModule;
// Private constructor - only callable by MCModule
MCDataAtom(MCModule *P, uint64_t Begin, uint64_t End)
: MCAtom(DataAtom, P, Begin, End) {
Data.reserve(End + 1 - Begin);
}
};
}
#endif

View File

@ -1,142 +0,0 @@
//===-- MCFunction.h --------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the data structures to hold a CFG reconstructed from
// machine code.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_MC_MCANALYSIS_MCFUNCTION_H
#define LLVM_MC_MCANALYSIS_MCFUNCTION_H
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInst.h"
#include <memory>
#include <string>
#include <vector>
namespace llvm {
class MCFunction;
class MCModule;
class MCTextAtom;
/// \brief Basic block containing a sequence of disassembled instructions.
/// The basic block is backed by an MCTextAtom, which holds the instructions,
/// and the address range it covers.
/// Create a basic block using MCFunction::createBlock.
class MCBasicBlock {
const MCTextAtom *Insts;
// MCFunction owns the basic block.
MCFunction *Parent;
friend class MCFunction;
MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent);
/// \name Predecessors/Successors, to represent the CFG.
/// @{
typedef std::vector<const MCBasicBlock *> BasicBlockListTy;
BasicBlockListTy Successors;
BasicBlockListTy Predecessors;
/// @}
public:
/// \brief Get the backing MCTextAtom, containing the instruction sequence.
const MCTextAtom *getInsts() const { return Insts; }
/// \name Get the owning MCFunction.
/// @{
const MCFunction *getParent() const { return Parent; }
MCFunction *getParent() { return Parent; }
/// @}
/// MC CFG access: Predecessors/Successors.
/// @{
typedef BasicBlockListTy::const_iterator succ_const_iterator;
succ_const_iterator succ_begin() const { return Successors.begin(); }
succ_const_iterator succ_end() const { return Successors.end(); }
typedef BasicBlockListTy::const_iterator pred_const_iterator;
pred_const_iterator pred_begin() const { return Predecessors.begin(); }
pred_const_iterator pred_end() const { return Predecessors.end(); }
void addSuccessor(const MCBasicBlock *MCBB);
bool isSuccessor(const MCBasicBlock *MCBB) const;
void addPredecessor(const MCBasicBlock *MCBB);
bool isPredecessor(const MCBasicBlock *MCBB) const;
/// \brief Split block, mirrorring NewAtom = Insts->split(..).
/// This moves all successors to \p SplitBB, and
/// adds a fallthrough to it.
/// \p SplitBB The result of splitting Insts, a basic block directly following
/// this basic block.
void splitBasicBlock(MCBasicBlock *SplitBB);
/// @}
};
/// \brief Represents a function in machine code, containing MCBasicBlocks.
/// MCFunctions are created by MCModule.
class MCFunction {
MCFunction (const MCFunction&) LLVM_DELETED_FUNCTION;
MCFunction& operator=(const MCFunction&) LLVM_DELETED_FUNCTION;
std::string Name;
MCModule *ParentModule;
typedef std::vector<std::unique_ptr<MCBasicBlock>> BasicBlockListTy;
BasicBlockListTy Blocks;
// MCModule owns the function.
friend class MCModule;
MCFunction(StringRef Name, MCModule *Parent);
public:
/// \brief Create an MCBasicBlock backed by Insts and add it to this function.
/// \param Insts Sequence of straight-line code backing the basic block.
/// \returns The newly created basic block.
MCBasicBlock &createBlock(const MCTextAtom &Insts);
StringRef getName() const { return Name; }
/// \name Get the owning MC Module.
/// @{
const MCModule *getParent() const { return ParentModule; }
MCModule *getParent() { return ParentModule; }
/// @}
/// \name Access to the function's basic blocks. No ordering is enforced,
/// except that the first block is the entry block.
/// @{
/// \brief Get the entry point basic block.
const MCBasicBlock *getEntryBlock() const { return front(); }
MCBasicBlock *getEntryBlock() { return front(); }
bool empty() const { return Blocks.empty(); }
typedef BasicBlockListTy::const_iterator const_iterator;
typedef BasicBlockListTy:: iterator iterator;
const_iterator begin() const { return Blocks.begin(); }
iterator begin() { return Blocks.begin(); }
const_iterator end() const { return Blocks.end(); }
iterator end() { return Blocks.end(); }
const MCBasicBlock* front() const { return Blocks.front().get(); }
MCBasicBlock* front() { return Blocks.front().get(); }
const MCBasicBlock* back() const { return Blocks.back().get(); }
MCBasicBlock* back() { return Blocks.back().get(); }
/// \brief Find the basic block, if any, that starts at \p StartAddr.
const MCBasicBlock *find(uint64_t StartAddr) const;
MCBasicBlock *find(uint64_t StartAddr);
/// @}
};
}
#endif

View File

@ -1,134 +0,0 @@
//===-- MCModule.h - MCModule class -----------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the MCModule class, which is used to
// represent a complete, disassembled object file or executable.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_MC_MCANALYSIS_MCMODULE_H
#define LLVM_MC_MCANALYSIS_MCMODULE_H
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataTypes.h"
#include <memory>
#include <vector>
namespace llvm {
class MCAtom;
class MCBasicBlock;
class MCDataAtom;
class MCFunction;
class MCObjectDisassembler;
class MCTextAtom;
/// \brief A completely disassembled object file or executable.
/// It comprises a list of MCAtom's, each representing a contiguous range of
/// either instructions or data.
/// An MCModule is created using MCObjectDisassembler::buildModule.
class MCModule {
/// \name Atom tracking
/// @{
/// \brief Atoms in this module, sorted by begin address.
/// FIXME: This doesn't handle overlapping atoms (which happen when a basic
/// block starts in the middle of an instruction of another basic block.)
typedef std::vector<MCAtom*> AtomListTy;
AtomListTy Atoms;
// For access to map/remap.
friend class MCAtom;
/// \brief Remap \p Atom to the given range, and update its Begin/End fields.
/// \param Atom An atom belonging to this module.
/// An atom should always use this method to update its bounds, because this
/// enables the owning MCModule to keep track of its atoms.
void remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd);
/// \brief Insert an atom in the module, using its Begin and End addresses.
void map(MCAtom *NewAtom);
/// @}
/// \name Basic block tracking
/// @{
typedef std::vector<MCBasicBlock*> BBsByAtomTy;
BBsByAtomTy BBsByAtom;
// For access to basic block > atom tracking.
friend class MCBasicBlock;
friend class MCTextAtom;
/// \brief Keep track of \p BBBackedByAtom as being backed by \p Atom.
/// This is used to update succs/preds when \p Atom is split.
void trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BBBackedByAtom);
void splitBasicBlocksForAtom(const MCTextAtom *TA, const MCTextAtom *NewTA);
/// @}
/// \name Function tracking
/// @{
typedef std::vector<std::unique_ptr<MCFunction>> FunctionListTy;
FunctionListTy Functions;
/// @}
/// The address of the entrypoint function.
uint64_t Entrypoint;
MCModule (const MCModule &) LLVM_DELETED_FUNCTION;
MCModule& operator=(const MCModule &) LLVM_DELETED_FUNCTION;
// MCObjectDisassembler creates MCModules.
friend class MCObjectDisassembler;
public:
MCModule();
~MCModule();
/// \name Create a new MCAtom covering the specified offset range.
/// @{
MCTextAtom *createTextAtom(uint64_t Begin, uint64_t End);
MCDataAtom *createDataAtom(uint64_t Begin, uint64_t End);
/// @}
/// \name Access to the owned atom list, ordered by begin address.
/// @{
const MCAtom *findAtomContaining(uint64_t Addr) const;
MCAtom *findAtomContaining(uint64_t Addr);
const MCAtom *findFirstAtomAfter(uint64_t Addr) const;
MCAtom *findFirstAtomAfter(uint64_t Addr);
typedef AtomListTy::const_iterator const_atom_iterator;
typedef AtomListTy:: iterator atom_iterator;
const_atom_iterator atom_begin() const { return Atoms.begin(); }
atom_iterator atom_begin() { return Atoms.begin(); }
const_atom_iterator atom_end() const { return Atoms.end(); }
atom_iterator atom_end() { return Atoms.end(); }
/// @}
/// \brief Create a new MCFunction.
MCFunction *createFunction(StringRef Name);
/// \name Access to the owned function list.
/// @{
typedef FunctionListTy::const_iterator const_func_iterator;
typedef FunctionListTy:: iterator func_iterator;
const_func_iterator func_begin() const { return Functions.begin(); }
func_iterator func_begin() { return Functions.begin(); }
const_func_iterator func_end() const { return Functions.end(); }
func_iterator func_end() { return Functions.end(); }
/// @}
/// \brief Get the address of the entrypoint function, or 0 if there is none.
uint64_t getEntrypoint() const { return Entrypoint; }
};
}
#endif

View File

@ -1,40 +0,0 @@
//===- MCModuleYAML.h - MCModule YAMLIO implementation ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file declares classes for handling the YAML representation
/// of MCModule.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_MC_MCANALYSIS_MCMODULEYAML_H
#define LLVM_MC_MCANALYSIS_MCMODULEYAML_H
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAnalysis/MCModule.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
class MCInstrInfo;
class MCRegisterInfo;
/// \brief Dump a YAML representation of the MCModule \p MCM to \p OS.
/// \returns The empty string on success, an error message on failure.
StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM,
const MCInstrInfo &MII, const MCRegisterInfo &MRI);
/// \brief Creates a new module and returns it in \p MCM.
/// \returns The empty string on success, an error message on failure.
StringRef yaml2mcmodule(std::unique_ptr<MCModule> &MCM, StringRef YamlContent,
const MCInstrInfo &MII, const MCRegisterInfo &MRI);
} // end namespace llvm
#endif

View File

@ -1,83 +0,0 @@
//===-- MCObjectSymbolizer.h ----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the MCObjectSymbolizer class, an MCSymbolizer that is
// backed by an object::ObjectFile.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_MC_MCANALYSIS_MCOBJECTSYMBOLIZER_H
#define LLVM_MC_MCANALYSIS_MCOBJECTSYMBOLIZER_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/MC/MCSymbolizer.h"
#include "llvm/Object/ObjectFile.h"
#include <vector>
namespace llvm {
class MCExpr;
class MCInst;
class MCRelocationInfo;
class raw_ostream;
/// \brief An ObjectFile-backed symbolizer.
class MCObjectSymbolizer : public MCSymbolizer {
protected:
const object::ObjectFile *Obj;
// Map a load address to the first relocation that applies there. As far as I
// know, if there are several relocations at the exact same address, they are
// related and the others can be determined from the first that was found in
// the relocation table. For instance, on x86-64 mach-o, a SUBTRACTOR
// relocation (referencing the minuend symbol) is followed by an UNSIGNED
// relocation (referencing the subtrahend symbol).
const object::RelocationRef *findRelocationAt(uint64_t Addr);
const object::SectionRef *findSectionContaining(uint64_t Addr);
MCObjectSymbolizer(MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo,
const object::ObjectFile *Obj);
public:
/// \name Overridden MCSymbolizer methods:
/// @{
bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream,
int64_t Value, uint64_t Address,
bool IsBranch, uint64_t Offset,
uint64_t InstSize) override;
void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
int64_t Value,
uint64_t Address) override;
/// @}
/// \brief Look for an external function symbol at \p Addr.
/// (References through the ELF PLT, Mach-O stubs, and similar).
/// \returns An MCExpr representing the external symbol, or 0 if not found.
virtual StringRef findExternalFunctionAt(uint64_t Addr);
/// \brief Create an object symbolizer for \p Obj.
static MCObjectSymbolizer *
createObjectSymbolizer(MCContext &Ctx,
std::unique_ptr<MCRelocationInfo> RelInfo,
const object::ObjectFile *Obj);
private:
typedef DenseMap<uint64_t, object::RelocationRef> AddrToRelocMap;
typedef std::vector<object::SectionRef> SortedSectionList;
SortedSectionList SortedSections;
AddrToRelocMap AddrToReloc;
void buildSectionList();
void buildRelocationByAddrMap();
};
}
#endif

View File

@ -1,174 +0,0 @@
//===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the MCObjectDisassembler class, which
// can be used to construct an MCModule and an MC CFG from an ObjectFile.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H
#define LLVM_MC_MCOBJECTDISASSEMBLER_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/MemoryObject.h"
#include <vector>
namespace llvm {
namespace object {
class ObjectFile;
class MachOObjectFile;
}
class MCBasicBlock;
class MCDisassembler;
class MCFunction;
class MCInstrAnalysis;
class MCModule;
class MCObjectSymbolizer;
/// \brief Disassemble an ObjectFile to an MCModule and MCFunctions.
/// This class builds on MCDisassembler to disassemble whole sections, creating
/// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data).
/// It can also be used to create a control flow graph consisting of MCFunctions
/// and MCBasicBlocks.
class MCObjectDisassembler {
public:
MCObjectDisassembler(const object::ObjectFile &Obj,
const MCDisassembler &Dis,
const MCInstrAnalysis &MIA);
virtual ~MCObjectDisassembler() {}
/// \brief Build an MCModule, creating atoms and optionally functions.
/// \param withCFG Also build a CFG by adding MCFunctions to the Module.
/// If withCFG is false, the MCModule built only contains atoms, representing
/// what was found in the object file. If withCFG is true, MCFunctions are
/// created, containing MCBasicBlocks. All text atoms are split to form basic
/// block atoms, which then each back an MCBasicBlock.
MCModule *buildModule(bool withCFG = false);
MCModule *buildEmptyModule();
typedef std::vector<uint64_t> AddressSetTy;
/// \name Create a new MCFunction.
MCFunction *createFunction(MCModule *Module, uint64_t BeginAddr,
AddressSetTy &CallTargets,
AddressSetTy &TailCallTargets);
/// \brief Set the region on which to fallback if disassembly was requested
/// somewhere not accessible in the object file.
/// This is used for dynamic disassembly (see RawMemoryObject).
void setFallbackRegion(std::unique_ptr<MemoryObject> Region) {
FallbackRegion = std::move(Region);
}
/// \brief Set the symbolizer to use to get information on external functions.
/// Note that this isn't used to do instruction-level symbolization (that is,
/// plugged into MCDisassembler), but to symbolize function call targets.
void setSymbolizer(MCObjectSymbolizer *ObjectSymbolizer) {
MOS = ObjectSymbolizer;
}
/// \brief Get the effective address of the entrypoint, or 0 if there is none.
virtual uint64_t getEntrypoint();
/// \name Get the addresses of static constructors/destructors in the object.
/// The caller is expected to know how to interpret the addresses;
/// for example, Mach-O init functions expect 5 arguments, not for ELF.
/// The addresses are original object file load addresses, not effective.
/// @{
virtual ArrayRef<uint64_t> getStaticInitFunctions();
virtual ArrayRef<uint64_t> getStaticExitFunctions();
/// @}
/// \name Translation between effective and objectfile load address.
/// @{
/// \brief Compute the effective load address, from an objectfile virtual
/// address. This is implemented in a format-specific way, to take into
/// account things like PIE/ASLR when doing dynamic disassembly.
/// For example, on Mach-O this would be done by adding the VM addr slide,
/// on glibc ELF by keeping a map between segment load addresses, filled
/// using dl_iterate_phdr, etc..
/// In most static situations and in the default impl., this returns \p Addr.
virtual uint64_t getEffectiveLoadAddr(uint64_t Addr);
/// \brief Compute the original load address, as specified in the objectfile.
/// This is the inverse of getEffectiveLoadAddr.
virtual uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr);
/// @}
protected:
const object::ObjectFile &Obj;
const MCDisassembler &Dis;
const MCInstrAnalysis &MIA;
MCObjectSymbolizer *MOS;
/// \brief The fallback memory region, outside the object file.
std::unique_ptr<MemoryObject> FallbackRegion;
/// \brief Return a memory region suitable for reading starting at \p Addr.
/// In most cases, this returns a StringRefMemoryObject backed by the
/// containing section. When no section was found, this returns the
/// FallbackRegion, if it is suitable.
/// If it is not, or if there is no fallback region, this returns 0.
MemoryObject *getRegionFor(uint64_t Addr);
private:
/// \brief Fill \p Module by creating an atom for each section.
/// This could be made much smarter, using information like symbols, but also
/// format-specific features, like mach-o function_start or data_in_code LCs.
void buildSectionAtoms(MCModule *Module);
/// \brief Enrich \p Module with a CFG consisting of MCFunctions.
/// \param Module An MCModule returned by buildModule, with no CFG.
/// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom.
/// When the CFG is built, contiguous instructions that were previously in a
/// single MCTextAtom will be split in multiple basic block atoms.
void buildCFG(MCModule *Module);
MCBasicBlock *getBBAt(MCModule *Module, MCFunction *MCFN, uint64_t BeginAddr,
AddressSetTy &CallTargets,
AddressSetTy &TailCallTargets);
};
class MCMachOObjectDisassembler : public MCObjectDisassembler {
const object::MachOObjectFile &MOOF;
uint64_t VMAddrSlide;
uint64_t HeaderLoadAddress;
// __DATA;__mod_init_func support.
llvm::StringRef ModInitContents;
// __DATA;__mod_exit_func support.
llvm::StringRef ModExitContents;
public:
/// \brief Construct a Mach-O specific object disassembler.
/// \param VMAddrSlide The virtual address slide applied by dyld.
/// \param HeaderLoadAddress The load address of the mach_header for this
/// object.
MCMachOObjectDisassembler(const object::MachOObjectFile &MOOF,
const MCDisassembler &Dis,
const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
uint64_t HeaderLoadAddress);
protected:
uint64_t getEffectiveLoadAddr(uint64_t Addr) override;
uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr) override;
uint64_t getEntrypoint() override;
ArrayRef<uint64_t> getStaticInitFunctions() override;
ArrayRef<uint64_t> getStaticExitFunctions() override;
};
}
#endif

View File

@ -108,9 +108,6 @@ module LLVM_MC {
umbrella "MC"
module * { export * }
// FIXME: Excluded due to mislayering.
exclude header "MC/MCAnalysis/MCObjectSymbolizer.h"
// Exclude this; it's fundamentally non-modular.
exclude header "MC/MCTargetOptionsCommandFlags.h"
}
@ -119,8 +116,6 @@ module LLVM_Object {
requires cplusplus
umbrella "Object"
module * { export * }
module LLVM_MC_MCAnalysis_MCObjectSymbolizer { header "MC/MCAnalysis/MCObjectSymbolizer.h" export * }
}
module LLVM_Option { requires cplusplus umbrella "Option" module * { export * } }

View File

@ -48,6 +48,5 @@ add_llvm_library(LLVMMC
YAML.cpp
)
add_subdirectory(MCAnalysis)
add_subdirectory(MCParser)
add_subdirectory(MCDisassembler)

View File

@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
subdirectories = MCAnalysis MCDisassembler MCParser
subdirectories = MCDisassembler MCParser
[component_0]
type = Library

View File

@ -1,8 +0,0 @@
add_llvm_library(LLVMMCAnalysis
MCAtom.cpp
MCFunction.cpp
MCModule.cpp
MCModuleYAML.cpp
MCObjectDisassembler.cpp
MCObjectSymbolizer.cpp
)

View File

@ -1,5 +0,0 @@
[component_0]
type = Library
name = MCAnalysis
parent = Libraries
required_libraries = MC Object Support

View File

@ -1,114 +0,0 @@
//===- lib/MC/MCAtom.cpp - MCAtom implementation --------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAnalysis/MCAtom.h"
#include "llvm/MC/MCAnalysis/MCModule.h"
#include "llvm/Support/ErrorHandling.h"
#include <iterator>
using namespace llvm;
// Pin the vtable to this file.
void MCAtom::anchor() {}
void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) {
Parent->remap(this, NewBegin, NewEnd);
}
void MCAtom::remapForTruncate(uint64_t TruncPt) {
assert((TruncPt >= Begin && TruncPt < End) &&
"Truncation point not contained in atom!");
remap(Begin, TruncPt);
}
void MCAtom::remapForSplit(uint64_t SplitPt,
uint64_t &LBegin, uint64_t &LEnd,
uint64_t &RBegin, uint64_t &REnd) {
assert((SplitPt > Begin && SplitPt <= End) &&
"Splitting at point not contained in atom!");
// Compute the new begin/end points.
LBegin = Begin;
LEnd = SplitPt - 1;
RBegin = SplitPt;
REnd = End;
// Remap this atom to become the lower of the two new ones.
remap(LBegin, LEnd);
}
// MCDataAtom
void MCDataAtom::addData(const MCData &D) {
Data.push_back(D);
if (Data.size() > End + 1 - Begin)
remap(Begin, End + 1);
}
void MCDataAtom::truncate(uint64_t TruncPt) {
remapForTruncate(TruncPt);
Data.resize(TruncPt - Begin + 1);
}
MCDataAtom *MCDataAtom::split(uint64_t SplitPt) {
uint64_t LBegin, LEnd, RBegin, REnd;
remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
MCDataAtom *RightAtom = Parent->createDataAtom(RBegin, REnd);
RightAtom->setName(getName());
std::vector<MCData>::iterator I = Data.begin() + (RBegin - LBegin);
assert(I != Data.end() && "Split point not found in range!");
std::copy(I, Data.end(), std::back_inserter(RightAtom->Data));
Data.erase(I, Data.end());
return RightAtom;
}
// MCTextAtom
void MCTextAtom::addInst(const MCInst &I, uint64_t Size) {
if (NextInstAddress + Size - 1 > End)
remap(Begin, NextInstAddress + Size - 1);
Insts.push_back(MCDecodedInst(I, NextInstAddress, Size));
NextInstAddress += Size;
}
void MCTextAtom::truncate(uint64_t TruncPt) {
remapForTruncate(TruncPt);
InstListTy::iterator I = Insts.begin();
while (I != Insts.end() && I->Address <= TruncPt) ++I;
assert(I != Insts.end() && "Truncation point not found in disassembly!");
assert(I->Address == TruncPt + 1 &&
"Truncation point does not fall on instruction boundary");
Insts.erase(I, Insts.end());
}
MCTextAtom *MCTextAtom::split(uint64_t SplitPt) {
uint64_t LBegin, LEnd, RBegin, REnd;
remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd);
MCTextAtom *RightAtom = Parent->createTextAtom(RBegin, REnd);
RightAtom->setName(getName());
InstListTy::iterator I = Insts.begin();
while (I != Insts.end() && I->Address < SplitPt) ++I;
assert(I != Insts.end() && "Split point not found in disassembly!");
assert(I->Address == SplitPt &&
"Split point does not fall on instruction boundary!");
std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts));
Insts.erase(I, Insts.end());
Parent->splitBasicBlocksForAtom(this, RightAtom);
return RightAtom;
}

View File

@ -1,76 +0,0 @@
//===-- lib/MC/MCFunction.cpp -----------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAnalysis/MCFunction.h"
#include "llvm/MC/MCAnalysis/MCAtom.h"
#include "llvm/MC/MCAnalysis/MCModule.h"
#include <algorithm>
using namespace llvm;
// MCFunction
MCFunction::MCFunction(StringRef Name, MCModule *Parent)
: Name(Name), ParentModule(Parent)
{}
MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) {
std::unique_ptr<MCBasicBlock> MCBB(new MCBasicBlock(TA, this));
Blocks.push_back(std::move(MCBB));
return *Blocks.back();
}
MCBasicBlock *MCFunction::find(uint64_t StartAddr) {
for (const_iterator I = begin(), E = end(); I != E; ++I)
if ((*I)->getInsts()->getBeginAddr() == StartAddr)
return I->get();
return nullptr;
}
const MCBasicBlock *MCFunction::find(uint64_t StartAddr) const {
return const_cast<MCFunction *>(this)->find(StartAddr);
}
// MCBasicBlock
MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent)
: Insts(&Insts), Parent(Parent) {
getParent()->getParent()->trackBBForAtom(&Insts, this);
}
void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) {
if (!isSuccessor(MCBB))
Successors.push_back(MCBB);
}
bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const {
return std::find(Successors.begin(), Successors.end(),
MCBB) != Successors.end();
}
void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) {
if (!isPredecessor(MCBB))
Predecessors.push_back(MCBB);
}
bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const {
return std::find(Predecessors.begin(), Predecessors.end(),
MCBB) != Predecessors.end();
}
void MCBasicBlock::splitBasicBlock(MCBasicBlock *SplitBB) {
assert(Insts->getEndAddr() + 1 == SplitBB->Insts->getBeginAddr() &&
"Splitting unrelated basic blocks!");
SplitBB->addPredecessor(this);
assert(SplitBB->Successors.empty() &&
"Split basic block shouldn't already have successors!");
SplitBB->Successors = Successors;
Successors.clear();
addSuccessor(SplitBB);
}

View File

@ -1,142 +0,0 @@
//===- lib/MC/MCModule.cpp - MCModule implementation ----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAnalysis/MCModule.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCAnalysis/MCAtom.h"
#include "llvm/MC/MCAnalysis/MCFunction.h"
#include <algorithm>
using namespace llvm;
static bool AtomComp(const MCAtom *L, uint64_t Addr) {
return L->getEndAddr() < Addr;
}
static bool AtomCompInv(uint64_t Addr, const MCAtom *R) {
return Addr < R->getEndAddr();
}
void MCModule::map(MCAtom *NewAtom) {
uint64_t Begin = NewAtom->Begin;
assert(Begin <= NewAtom->End && "Creating MCAtom with endpoints reversed?");
// Check for atoms already covering this range.
AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
Begin, AtomComp);
assert((I == atom_end() || (*I)->getBeginAddr() > NewAtom->End)
&& "Offset range already occupied!");
// Insert the new atom to the list.
Atoms.insert(I, NewAtom);
}
MCTextAtom *MCModule::createTextAtom(uint64_t Begin, uint64_t End) {
MCTextAtom *NewAtom = new MCTextAtom(this, Begin, End);
map(NewAtom);
return NewAtom;
}
MCDataAtom *MCModule::createDataAtom(uint64_t Begin, uint64_t End) {
MCDataAtom *NewAtom = new MCDataAtom(this, Begin, End);
map(NewAtom);
return NewAtom;
}
// remap - Update the interval mapping for an atom.
void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) {
// Find and erase the old mapping.
AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(),
Atom->Begin, AtomComp);
assert(I != atom_end() && "Atom offset not found in module!");
assert(*I == Atom && "Previous atom mapping was invalid!");
Atoms.erase(I);
// FIXME: special case NewBegin == Atom->Begin
// Insert the new mapping.
AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(),
NewBegin, AtomComp);
assert((NewI == atom_end() || (*NewI)->getBeginAddr() > Atom->End)
&& "Offset range already occupied!");
Atoms.insert(NewI, Atom);
// Update the atom internal bounds.
Atom->Begin = NewBegin;
Atom->End = NewEnd;
}
const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const {
AtomListTy::const_iterator I = std::lower_bound(atom_begin(), atom_end(),
Addr, AtomComp);
if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
return *I;
return nullptr;
}
MCAtom *MCModule::findAtomContaining(uint64_t Addr) {
return const_cast<MCAtom*>(
const_cast<const MCModule *>(this)->findAtomContaining(Addr));
}
const MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) const {
AtomListTy::const_iterator I = std::upper_bound(atom_begin(), atom_end(),
Addr, AtomCompInv);
if (I != atom_end())
return *I;
return nullptr;
}
MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) {
return const_cast<MCAtom*>(
const_cast<const MCModule *>(this)->findFirstAtomAfter(Addr));
}
MCFunction *MCModule::createFunction(StringRef Name) {
std::unique_ptr<MCFunction> MCF(new MCFunction(Name, this));
Functions.push_back(std::move(MCF));
return Functions.back().get();
}
static bool CompBBToAtom(MCBasicBlock *BB, const MCTextAtom *Atom) {
return BB->getInsts() < Atom;
}
void MCModule::splitBasicBlocksForAtom(const MCTextAtom *TA,
const MCTextAtom *NewTA) {
BBsByAtomTy::iterator
I = std::lower_bound(BBsByAtom.begin(), BBsByAtom.end(),
TA, CompBBToAtom);
for (; I != BBsByAtom.end() && (*I)->getInsts() == TA; ++I) {
MCBasicBlock *BB = *I;
MCBasicBlock *NewBB = &BB->getParent()->createBlock(*NewTA);
BB->splitBasicBlock(NewBB);
}
}
void MCModule::trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BB) {
assert(Atom == BB->getInsts() && "Text atom doesn't back the basic block!");
BBsByAtomTy::iterator I = std::lower_bound(BBsByAtom.begin(),
BBsByAtom.end(),
Atom, CompBBToAtom);
for (; I != BBsByAtom.end() && (*I)->getInsts() == Atom; ++I)
if (*I == BB)
return;
BBsByAtom.insert(I, BB);
}
MCModule::MCModule() : Entrypoint(0) { }
MCModule::~MCModule() {
for (AtomListTy::iterator AI = atom_begin(),
AE = atom_end();
AI != AE; ++AI)
delete *AI;
}

View File

@ -1,464 +0,0 @@
//===- MCModuleYAML.cpp - MCModule YAMLIO implementation ------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines classes for handling the YAML representation of MCModule.
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAnalysis/MCModuleYAML.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/MC/MCAnalysis/MCAtom.h"
#include "llvm/MC/MCAnalysis/MCFunction.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/YAML.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/YAMLTraits.h"
#include <vector>
namespace llvm {
namespace {
// This class is used to map opcode and register names to enum values.
//
// There are at least 3 obvious ways to do this:
// 1- Generate an MII/MRI method using a tablegen StringMatcher
// 2- Write an MII/MRI method using std::lower_bound and the assumption that
// the enums are sorted (starting at a fixed value).
// 3- Do the matching manually as is done here.
//
// Why 3?
// 1- A StringMatcher function for thousands of entries would incur
// a non-negligible binary size overhead.
// 2- The lower_bound comparators would be somewhat involved and aren't
// obviously reusable (see LessRecordRegister in llvm/TableGen/Record.h)
// 3- This isn't actually something useful outside tests (but the same argument
// can be made against having {MII,MRI}::getName).
//
// If this becomes useful outside this specific situation, feel free to do
// the Right Thing (tm) and move the functionality to MII/MRI.
//
class InstrRegInfoHolder {
typedef StringMap<unsigned, BumpPtrAllocator> EnumValByNameTy;
EnumValByNameTy InstEnumValueByName;
EnumValByNameTy RegEnumValueByName;
public:
const MCInstrInfo &MII;
const MCRegisterInfo &MRI;
InstrRegInfoHolder(const MCInstrInfo &MII, const MCRegisterInfo &MRI)
: InstEnumValueByName(NextPowerOf2(MII.getNumOpcodes())),
RegEnumValueByName(NextPowerOf2(MRI.getNumRegs())), MII(MII), MRI(MRI) {
for (int i = 0, e = MII.getNumOpcodes(); i != e; ++i)
InstEnumValueByName[MII.getName(i)] = i;
for (int i = 0, e = MRI.getNumRegs(); i != e; ++i)
RegEnumValueByName[MRI.getName(i)] = i;
}
bool matchRegister(StringRef Name, unsigned &Reg) {
EnumValByNameTy::const_iterator It = RegEnumValueByName.find(Name);
if (It == RegEnumValueByName.end())
return false;
Reg = It->getValue();
return true;
}
bool matchOpcode(StringRef Name, unsigned &Opc) {
EnumValByNameTy::const_iterator It = InstEnumValueByName.find(Name);
if (It == InstEnumValueByName.end())
return false;
Opc = It->getValue();
return true;
}
};
} // end unnamed namespace
namespace MCModuleYAML {
LLVM_YAML_STRONG_TYPEDEF(unsigned, OpcodeEnum)
struct Operand {
MCOperand MCOp;
};
struct Inst {
OpcodeEnum Opcode;
std::vector<Operand> Operands;
uint64_t Size;
};
struct Atom {
MCAtom::AtomKind Type;
yaml::Hex64 StartAddress;
uint64_t Size;
std::vector<Inst> Insts;
yaml::BinaryRef Data;
};
struct BasicBlock {
yaml::Hex64 Address;
std::vector<yaml::Hex64> Preds;
std::vector<yaml::Hex64> Succs;
};
struct Function {
StringRef Name;
std::vector<BasicBlock> BasicBlocks;
};
struct Module {
std::vector<Atom> Atoms;
std::vector<Function> Functions;
};
} // end namespace MCModuleYAML
} // end namespace llvm
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex64)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::MCModuleYAML::Operand)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Inst)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Atom)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::BasicBlock)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Function)
namespace llvm {
namespace yaml {
template <> struct ScalarEnumerationTraits<MCAtom::AtomKind> {
static void enumeration(IO &IO, MCAtom::AtomKind &Kind);
};
template <> struct MappingTraits<MCModuleYAML::Atom> {
static void mapping(IO &IO, MCModuleYAML::Atom &A);
};
template <> struct MappingTraits<MCModuleYAML::Inst> {
static void mapping(IO &IO, MCModuleYAML::Inst &I);
};
template <> struct MappingTraits<MCModuleYAML::BasicBlock> {
static void mapping(IO &IO, MCModuleYAML::BasicBlock &BB);
};
template <> struct MappingTraits<MCModuleYAML::Function> {
static void mapping(IO &IO, MCModuleYAML::Function &Fn);
};
template <> struct MappingTraits<MCModuleYAML::Module> {
static void mapping(IO &IO, MCModuleYAML::Module &M);
};
template <> struct ScalarTraits<MCModuleYAML::Operand> {
static void output(const MCModuleYAML::Operand &, void *,
llvm::raw_ostream &);
static StringRef input(StringRef, void *, MCModuleYAML::Operand &);
static bool mustQuote(StringRef) { return false; }
};
template <> struct ScalarTraits<MCModuleYAML::OpcodeEnum> {
static void output(const MCModuleYAML::OpcodeEnum &, void *,
llvm::raw_ostream &);
static StringRef input(StringRef, void *, MCModuleYAML::OpcodeEnum &);
static bool mustQuote(StringRef) { return false; }
};
void ScalarEnumerationTraits<MCAtom::AtomKind>::enumeration(
IO &IO, MCAtom::AtomKind &Value) {
IO.enumCase(Value, "Text", MCAtom::TextAtom);
IO.enumCase(Value, "Data", MCAtom::DataAtom);
}
void MappingTraits<MCModuleYAML::Atom>::mapping(IO &IO, MCModuleYAML::Atom &A) {
IO.mapRequired("StartAddress", A.StartAddress);
IO.mapRequired("Size", A.Size);
IO.mapRequired("Type", A.Type);
if (A.Type == MCAtom::TextAtom)
IO.mapRequired("Content", A.Insts);
else if (A.Type == MCAtom::DataAtom)
IO.mapRequired("Content", A.Data);
}
void MappingTraits<MCModuleYAML::Inst>::mapping(IO &IO, MCModuleYAML::Inst &I) {
IO.mapRequired("Inst", I.Opcode);
IO.mapRequired("Size", I.Size);
IO.mapRequired("Ops", I.Operands);
}
void
MappingTraits<MCModuleYAML::BasicBlock>::mapping(IO &IO,
MCModuleYAML::BasicBlock &BB) {
IO.mapRequired("Address", BB.Address);
IO.mapRequired("Preds", BB.Preds);
IO.mapRequired("Succs", BB.Succs);
}
void MappingTraits<MCModuleYAML::Function>::mapping(IO &IO,
MCModuleYAML::Function &F) {
IO.mapRequired("Name", F.Name);
IO.mapRequired("BasicBlocks", F.BasicBlocks);
}
void MappingTraits<MCModuleYAML::Module>::mapping(IO &IO,
MCModuleYAML::Module &M) {
IO.mapRequired("Atoms", M.Atoms);
IO.mapOptional("Functions", M.Functions);
}
void
ScalarTraits<MCModuleYAML::Operand>::output(const MCModuleYAML::Operand &Val,
void *Ctx, raw_ostream &Out) {
InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
// FIXME: Doesn't support FPImm and expr/inst, but do these make sense?
if (Val.MCOp.isImm())
Out << "I" << Val.MCOp.getImm();
else if (Val.MCOp.isReg())
Out << "R" << IRI->MRI.getName(Val.MCOp.getReg());
else
llvm_unreachable("Trying to output invalid MCOperand!");
}
StringRef
ScalarTraits<MCModuleYAML::Operand>::input(StringRef Scalar, void *Ctx,
MCModuleYAML::Operand &Val) {
InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
char Type = 0;
if (Scalar.size() >= 1)
Type = Scalar.front();
if (Type != 'R' && Type != 'I')
return "Operand must start with 'R' (register) or 'I' (immediate).";
if (Type == 'R') {
unsigned Reg;
if (!IRI->matchRegister(Scalar.substr(1), Reg))
return "Invalid register name.";
Val.MCOp = MCOperand::CreateReg(Reg);
} else if (Type == 'I') {
int64_t RIVal;
if (Scalar.substr(1).getAsInteger(10, RIVal))
return "Invalid immediate value.";
Val.MCOp = MCOperand::CreateImm(RIVal);
} else {
Val.MCOp = MCOperand();
}
return StringRef();
}
void ScalarTraits<MCModuleYAML::OpcodeEnum>::output(
const MCModuleYAML::OpcodeEnum &Val, void *Ctx, raw_ostream &Out) {
InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
Out << IRI->MII.getName(Val);
}
StringRef
ScalarTraits<MCModuleYAML::OpcodeEnum>::input(StringRef Scalar, void *Ctx,
MCModuleYAML::OpcodeEnum &Val) {
InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx;
unsigned Opc;
if (!IRI->matchOpcode(Scalar, Opc))
return "Invalid instruction opcode.";
Val = Opc;
return "";
}
} // end namespace yaml
namespace {
class MCModule2YAML {
const MCModule &MCM;
MCModuleYAML::Module YAMLModule;
void dumpAtom(const MCAtom *MCA);
void dumpFunction(const MCFunction &MCF);
void dumpBasicBlock(const MCBasicBlock *MCBB);
public:
MCModule2YAML(const MCModule &MCM);
MCModuleYAML::Module &getYAMLModule();
};
class YAML2MCModule {
MCModule &MCM;
public:
YAML2MCModule(MCModule &MCM);
StringRef parse(const MCModuleYAML::Module &YAMLModule);
};
} // end unnamed namespace
MCModule2YAML::MCModule2YAML(const MCModule &MCM) : MCM(MCM), YAMLModule() {
for (MCModule::const_atom_iterator AI = MCM.atom_begin(), AE = MCM.atom_end();
AI != AE; ++AI)
dumpAtom(*AI);
for (MCModule::const_func_iterator FI = MCM.func_begin(), FE = MCM.func_end();
FI != FE; ++FI)
dumpFunction(**FI);
}
void MCModule2YAML::dumpAtom(const MCAtom *MCA) {
YAMLModule.Atoms.resize(YAMLModule.Atoms.size() + 1);
MCModuleYAML::Atom &A = YAMLModule.Atoms.back();
A.Type = MCA->getKind();
A.StartAddress = MCA->getBeginAddr();
A.Size = MCA->getEndAddr() - MCA->getBeginAddr() + 1;
if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(MCA)) {
const size_t InstCount = TA->size();
A.Insts.resize(InstCount);
for (size_t i = 0; i != InstCount; ++i) {
const MCDecodedInst &MCDI = TA->at(i);
A.Insts[i].Opcode = MCDI.Inst.getOpcode();
A.Insts[i].Size = MCDI.Size;
const unsigned OpCount = MCDI.Inst.getNumOperands();
A.Insts[i].Operands.resize(OpCount);
for (unsigned oi = 0; oi != OpCount; ++oi)
A.Insts[i].Operands[oi].MCOp = MCDI.Inst.getOperand(oi);
}
} else if (const MCDataAtom *DA = dyn_cast<MCDataAtom>(MCA)) {
A.Data = DA->getData();
} else {
llvm_unreachable("Unknown atom type.");
}
}
void MCModule2YAML::dumpFunction(const MCFunction &MCF) {
YAMLModule.Functions.resize(YAMLModule.Functions.size() + 1);
MCModuleYAML::Function &F = YAMLModule.Functions.back();
F.Name = MCF.getName();
for (MCFunction::const_iterator BBI = MCF.begin(), BBE = MCF.end();
BBI != BBE; ++BBI) {
const MCBasicBlock &MCBB = **BBI;
F.BasicBlocks.resize(F.BasicBlocks.size() + 1);
MCModuleYAML::BasicBlock &BB = F.BasicBlocks.back();
BB.Address = MCBB.getInsts()->getBeginAddr();
for (MCBasicBlock::pred_const_iterator PI = MCBB.pred_begin(),
PE = MCBB.pred_end();
PI != PE; ++PI)
BB.Preds.push_back((*PI)->getInsts()->getBeginAddr());
for (MCBasicBlock::succ_const_iterator SI = MCBB.succ_begin(),
SE = MCBB.succ_end();
SI != SE; ++SI)
BB.Succs.push_back((*SI)->getInsts()->getBeginAddr());
}
}
MCModuleYAML::Module &MCModule2YAML::getYAMLModule() { return YAMLModule; }
YAML2MCModule::YAML2MCModule(MCModule &MCM) : MCM(MCM) {}
StringRef YAML2MCModule::parse(const MCModuleYAML::Module &YAMLModule) {
typedef std::vector<MCModuleYAML::Atom>::const_iterator AtomIt;
typedef std::vector<MCModuleYAML::Inst>::const_iterator InstIt;
typedef std::vector<MCModuleYAML::Operand>::const_iterator OpIt;
typedef DenseMap<uint64_t, MCTextAtom *> AddrToTextAtomTy;
AddrToTextAtomTy TAByAddr;
for (AtomIt AI = YAMLModule.Atoms.begin(), AE = YAMLModule.Atoms.end();
AI != AE; ++AI) {
uint64_t StartAddress = AI->StartAddress;
if (AI->Size == 0)
return "Atoms can't be empty!";
uint64_t EndAddress = StartAddress + AI->Size - 1;
switch (AI->Type) {
case MCAtom::TextAtom: {
MCTextAtom *TA = MCM.createTextAtom(StartAddress, EndAddress);
TAByAddr[StartAddress] = TA;
for (InstIt II = AI->Insts.begin(), IE = AI->Insts.end(); II != IE;
++II) {
MCInst MI;
MI.setOpcode(II->Opcode);
for (OpIt OI = II->Operands.begin(), OE = II->Operands.end(); OI != OE;
++OI)
MI.addOperand(OI->MCOp);
TA->addInst(MI, II->Size);
}
break;
}
case MCAtom::DataAtom: {
MCDataAtom *DA = MCM.createDataAtom(StartAddress, EndAddress);
SmallVector<char, 64> Data;
raw_svector_ostream OS(Data);
AI->Data.writeAsBinary(OS);
OS.flush();
for (size_t i = 0, e = Data.size(); i != e; ++i)
DA->addData((uint8_t)Data[i]);
break;
}
}
}
typedef std::vector<MCModuleYAML::Function>::const_iterator FuncIt;
typedef std::vector<MCModuleYAML::BasicBlock>::const_iterator BBIt;
typedef std::vector<yaml::Hex64>::const_iterator AddrIt;
for (FuncIt FI = YAMLModule.Functions.begin(),
FE = YAMLModule.Functions.end();
FI != FE; ++FI) {
MCFunction *MCFN = MCM.createFunction(FI->Name);
for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end();
BBI != BBE; ++BBI) {
AddrToTextAtomTy::const_iterator It = TAByAddr.find(BBI->Address);
if (It == TAByAddr.end())
return "Basic block start address doesn't match any text atom!";
MCFN->createBlock(*It->second);
}
for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end();
BBI != BBE; ++BBI) {
MCBasicBlock *MCBB = MCFN->find(BBI->Address);
if (!MCBB)
return "Couldn't find matching basic block in function.";
for (AddrIt PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE;
++PI) {
MCBasicBlock *Pred = MCFN->find(*PI);
if (!Pred)
return "Couldn't find predecessor basic block.";
MCBB->addPredecessor(Pred);
}
for (AddrIt SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE;
++SI) {
MCBasicBlock *Succ = MCFN->find(*SI);
if (!Succ)
return "Couldn't find predecessor basic block.";
MCBB->addSuccessor(Succ);
}
}
}
return "";
}
StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM,
const MCInstrInfo &MII, const MCRegisterInfo &MRI) {
MCModule2YAML Dumper(MCM);
InstrRegInfoHolder IRI(MII, MRI);
yaml::Output YOut(OS, (void *)&IRI);
YOut << Dumper.getYAMLModule();
return "";
}
StringRef yaml2mcmodule(std::unique_ptr<MCModule> &MCM, StringRef YamlContent,
const MCInstrInfo &MII, const MCRegisterInfo &MRI) {
MCM.reset(new MCModule);
YAML2MCModule Parser(*MCM);
MCModuleYAML::Module YAMLModule;
InstrRegInfoHolder IRI(MII, MRI);
yaml::Input YIn(YamlContent, (void *)&IRI);
YIn >> YAMLModule;
if (std::error_code ec = YIn.error())
return ec.message();
StringRef err = Parser.parse(YAMLModule);
if (!err.empty())
return err;
return "";
}
} // end namespace llvm

View File

@ -1,574 +0,0 @@
//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCObjectDisassembler.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAnalysis/MCAtom.h"
#include "llvm/MC/MCAnalysis/MCFunction.h"
#include "llvm/MC/MCAnalysis/MCModule.h"
#include "llvm/MC/MCAnalysis/MCObjectSymbolizer.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MachO.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/StringRefMemoryObject.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
using namespace llvm;
using namespace object;
#define DEBUG_TYPE "mc"
MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj,
const MCDisassembler &Dis,
const MCInstrAnalysis &MIA)
: Obj(Obj), Dis(Dis), MIA(MIA), MOS(nullptr) {}
uint64_t MCObjectDisassembler::getEntrypoint() {
for (const SymbolRef &Symbol : Obj.symbols()) {
StringRef Name;
Symbol.getName(Name);
if (Name == "main" || Name == "_main") {
uint64_t Entrypoint;
Symbol.getAddress(Entrypoint);
return getEffectiveLoadAddr(Entrypoint);
}
}
return 0;
}
ArrayRef<uint64_t> MCObjectDisassembler::getStaticInitFunctions() {
return None;
}
ArrayRef<uint64_t> MCObjectDisassembler::getStaticExitFunctions() {
return None;
}
MemoryObject *MCObjectDisassembler::getRegionFor(uint64_t Addr) {
// FIXME: Keep track of object sections.
return FallbackRegion.get();
}
uint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
return Addr;
}
uint64_t MCObjectDisassembler::getOriginalLoadAddr(uint64_t Addr) {
return Addr;
}
MCModule *MCObjectDisassembler::buildEmptyModule() {
MCModule *Module = new MCModule;
Module->Entrypoint = getEntrypoint();
return Module;
}
MCModule *MCObjectDisassembler::buildModule(bool withCFG) {
MCModule *Module = buildEmptyModule();
buildSectionAtoms(Module);
if (withCFG)
buildCFG(Module);
return Module;
}
void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
for (const SectionRef &Section : Obj.sections()) {
bool isText;
Section.isText(isText);
bool isData;
Section.isData(isData);
if (!isData && !isText)
continue;
uint64_t StartAddr;
Section.getAddress(StartAddr);
uint64_t SecSize;
Section.getSize(SecSize);
if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize)
continue;
StartAddr = getEffectiveLoadAddr(StartAddr);
StringRef Contents;
Section.getContents(Contents);
StringRefMemoryObject memoryObject(Contents, StartAddr);
// We don't care about things like non-file-backed sections yet.
if (Contents.size() != SecSize || !SecSize)
continue;
uint64_t EndAddr = StartAddr + SecSize - 1;
StringRef SecName;
Section.getName(SecName);
if (isText) {
MCTextAtom *Text = nullptr;
MCDataAtom *InvalidData = nullptr;
uint64_t InstSize;
for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
const uint64_t CurAddr = StartAddr + Index;
MCInst Inst;
if (Dis.getInstruction(Inst, InstSize, memoryObject, CurAddr, nulls(),
nulls())) {
if (!Text) {
Text = Module->createTextAtom(CurAddr, CurAddr);
Text->setName(SecName);
}
Text->addInst(Inst, InstSize);
InvalidData = nullptr;
} else {
assert(InstSize && "getInstruction() consumed no bytes");
if (!InvalidData) {
Text = nullptr;
InvalidData = Module->createDataAtom(CurAddr, CurAddr+InstSize - 1);
}
for (uint64_t I = 0; I < InstSize; ++I)
InvalidData->addData(Contents[Index+I]);
}
}
} else {
MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr);
Data->setName(SecName);
for (uint64_t Index = 0; Index < SecSize; ++Index)
Data->addData(Contents[Index]);
}
}
}
namespace {
struct BBInfo;
typedef SmallPtrSet<BBInfo*, 2> BBInfoSetTy;
struct BBInfo {
MCTextAtom *Atom;
MCBasicBlock *BB;
BBInfoSetTy Succs;
BBInfoSetTy Preds;
MCObjectDisassembler::AddressSetTy SuccAddrs;
BBInfo() : Atom(nullptr), BB(nullptr) {}
void addSucc(BBInfo &Succ) {
Succs.insert(&Succ);
Succ.Preds.insert(this);
}
};
}
static void RemoveDupsFromAddressVector(MCObjectDisassembler::AddressSetTy &V) {
std::sort(V.begin(), V.end());
V.erase(std::unique(V.begin(), V.end()), V.end());
}
void MCObjectDisassembler::buildCFG(MCModule *Module) {
typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
BBInfoByAddrTy BBInfos;
AddressSetTy Splits;
AddressSetTy Calls;
for (const SymbolRef &Symbol : Obj.symbols()) {
SymbolRef::Type SymType;
Symbol.getType(SymType);
if (SymType == SymbolRef::ST_Function) {
uint64_t SymAddr;
Symbol.getAddress(SymAddr);
SymAddr = getEffectiveLoadAddr(SymAddr);
Calls.push_back(SymAddr);
Splits.push_back(SymAddr);
}
}
assert(Module->func_begin() == Module->func_end()
&& "Module already has a CFG!");
// First, determine the basic block boundaries and call targets.
for (MCModule::atom_iterator AI = Module->atom_begin(),
AE = Module->atom_end();
AI != AE; ++AI) {
MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
if (!TA) continue;
Calls.push_back(TA->getBeginAddr());
BBInfos[TA->getBeginAddr()].Atom = TA;
for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
II != IE; ++II) {
if (MIA.isTerminator(II->Inst))
Splits.push_back(II->Address + II->Size);
uint64_t Target;
if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) {
if (MIA.isCall(II->Inst))
Calls.push_back(Target);
Splits.push_back(Target);
}
}
}
RemoveDupsFromAddressVector(Splits);
RemoveDupsFromAddressVector(Calls);
// Split text atoms into basic block atoms.
for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
SI != SE; ++SI) {
MCAtom *A = Module->findAtomContaining(*SI);
if (!A) continue;
MCTextAtom *TA = cast<MCTextAtom>(A);
if (TA->getBeginAddr() == *SI)
continue;
MCTextAtom *NewAtom = TA->split(*SI);
BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom;
StringRef BBName = TA->getName();
BBName = BBName.substr(0, BBName.find_last_of(':'));
NewAtom->setName((BBName + ":" + utohexstr(*SI)).str());
}
// Compute succs/preds.
for (MCModule::atom_iterator AI = Module->atom_begin(),
AE = Module->atom_end();
AI != AE; ++AI) {
MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
if (!TA) continue;
BBInfo &CurBB = BBInfos[TA->getBeginAddr()];
const MCDecodedInst &LI = TA->back();
if (MIA.isBranch(LI.Inst)) {
uint64_t Target;
if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target))
CurBB.addSucc(BBInfos[Target]);
if (MIA.isConditionalBranch(LI.Inst))
CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
} else if (!MIA.isTerminator(LI.Inst))
CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
}
// Create functions and basic blocks.
for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end();
CI != CE; ++CI) {
BBInfo &BBI = BBInfos[*CI];
if (!BBI.Atom) continue;
MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName());
// Create MCBBs.
SmallSetVector<BBInfo*, 16> Worklist;
Worklist.insert(&BBI);
for (size_t wi = 0; wi < Worklist.size(); ++wi) {
BBInfo *BBI = Worklist[wi];
if (!BBI->Atom)
continue;
BBI->BB = &MCFN.createBlock(*BBI->Atom);
// Add all predecessors and successors to the worklist.
for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
SI != SE; ++SI)
Worklist.insert(*SI);
for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
PI != PE; ++PI)
Worklist.insert(*PI);
}
// Set preds/succs.
for (size_t wi = 0; wi < Worklist.size(); ++wi) {
BBInfo *BBI = Worklist[wi];
MCBasicBlock *MCBB = BBI->BB;
if (!MCBB)
continue;
for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
SI != SE; ++SI)
if ((*SI)->BB)
MCBB->addSuccessor((*SI)->BB);
for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
PI != PE; ++PI)
if ((*PI)->BB)
MCBB->addPredecessor((*PI)->BB);
}
}
}
// Basic idea of the disassembly + discovery:
//
// start with the wanted address, insert it in the worklist
// while worklist not empty, take next address in the worklist:
// - check if atom exists there
// - if middle of atom:
// - split basic blocks referencing the atom
// - look for an already encountered BBInfo (using a map<atom, bbinfo>)
// - if there is, split it (new one, fallthrough, move succs, etc..)
// - if start of atom: nothing else to do
// - if no atom: create new atom and new bbinfo
// - look at the last instruction in the atom, add succs to worklist
// for all elements in the worklist:
// - create basic block, update preds/succs, etc..
//
MCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN,
uint64_t BBBeginAddr,
AddressSetTy &CallTargets,
AddressSetTy &TailCallTargets) {
typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
typedef SmallSetVector<uint64_t, 16> AddrWorklistTy;
BBInfoByAddrTy BBInfos;
AddrWorklistTy Worklist;
Worklist.insert(BBBeginAddr);
for (size_t wi = 0; wi < Worklist.size(); ++wi) {
const uint64_t BeginAddr = Worklist[wi];
BBInfo *BBI = &BBInfos[BeginAddr];
MCTextAtom *&TA = BBI->Atom;
assert(!TA && "Discovered basic block already has an associated atom!");
// Look for an atom at BeginAddr.
if (MCAtom *A = Module->findAtomContaining(BeginAddr)) {
// FIXME: We don't care about mixed atoms, see above.
TA = cast<MCTextAtom>(A);
// The found atom doesn't begin at BeginAddr, we have to split it.
if (TA->getBeginAddr() != BeginAddr) {
// FIXME: Handle overlapping atoms: middle-starting instructions, etc..
MCTextAtom *NewTA = TA->split(BeginAddr);
// Look for an already encountered basic block that needs splitting
BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr());
if (It != BBInfos.end() && It->second.Atom) {
BBI->SuccAddrs = It->second.SuccAddrs;
It->second.SuccAddrs.clear();
It->second.SuccAddrs.push_back(BeginAddr);
}
TA = NewTA;
}
BBI->Atom = TA;
} else {
// If we didn't find an atom, then we have to disassemble to create one!
MemoryObject *Region = getRegionFor(BeginAddr);
if (!Region)
llvm_unreachable(("Couldn't find suitable region for disassembly at " +
utostr(BeginAddr)).c_str());
uint64_t InstSize;
uint64_t EndAddr = Region->getBase() + Region->getExtent();
// We want to stop before the next atom and have a fallthrough to it.
if (MCTextAtom *NextAtom =
cast_or_null<MCTextAtom>(Module->findFirstAtomAfter(BeginAddr)))
EndAddr = std::min(EndAddr, NextAtom->getBeginAddr());
for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) {
MCInst Inst;
if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(),
nulls())) {
if (!TA)
TA = Module->createTextAtom(Addr, Addr);
TA->addInst(Inst, InstSize);
} else {
// We don't care about splitting mixed atoms either.
llvm_unreachable("Couldn't disassemble instruction in atom.");
}
uint64_t BranchTarget;
if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) {
if (MIA.isCall(Inst))
CallTargets.push_back(BranchTarget);
}
if (MIA.isTerminator(Inst))
break;
}
BBI->Atom = TA;
}
assert(TA && "Couldn't disassemble atom, none was created!");
assert(TA->begin() != TA->end() && "Empty atom!");
MemoryObject *Region = getRegionFor(TA->getBeginAddr());
assert(Region && "Couldn't find region for already disassembled code!");
uint64_t EndRegion = Region->getBase() + Region->getExtent();
// Now we have a basic block atom, add successors.
// Add the fallthrough block.
if ((MIA.isConditionalBranch(TA->back().Inst) ||
!MIA.isTerminator(TA->back().Inst)) &&
(TA->getEndAddr() + 1 < EndRegion)) {
BBI->SuccAddrs.push_back(TA->getEndAddr() + 1);
Worklist.insert(TA->getEndAddr() + 1);
}
// If the terminator is a branch, add the target block.
if (MIA.isBranch(TA->back().Inst)) {
uint64_t BranchTarget;
if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address,
TA->back().Size, BranchTarget)) {
StringRef ExtFnName;
if (MOS)
ExtFnName =
MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget));
if (!ExtFnName.empty()) {
TailCallTargets.push_back(BranchTarget);
CallTargets.push_back(BranchTarget);
} else {
BBI->SuccAddrs.push_back(BranchTarget);
Worklist.insert(BranchTarget);
}
}
}
}
for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
const uint64_t BeginAddr = Worklist[wi];
BBInfo *BBI = &BBInfos[BeginAddr];
assert(BBI->Atom && "Found a basic block without an associated atom!");
// Look for a basic block at BeginAddr.
BBI->BB = MCFN->find(BeginAddr);
if (BBI->BB) {
// FIXME: check that the succs/preds are the same
continue;
}
// If there was none, we have to create one from the atom.
BBI->BB = &MCFN->createBlock(*BBI->Atom);
}
for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
const uint64_t BeginAddr = Worklist[wi];
BBInfo *BBI = &BBInfos[BeginAddr];
MCBasicBlock *BB = BBI->BB;
RemoveDupsFromAddressVector(BBI->SuccAddrs);
for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(),
SE = BBI->SuccAddrs.end();
SI != SE; ++SI) {
MCBasicBlock *Succ = BBInfos[*SI].BB;
BB->addSuccessor(Succ);
Succ->addPredecessor(BB);
}
}
assert(BBInfos[Worklist[0]].BB &&
"No basic block created at requested address?");
return BBInfos[Worklist[0]].BB;
}
MCFunction *
MCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr,
AddressSetTy &CallTargets,
AddressSetTy &TailCallTargets) {
// First, check if this is an external function.
StringRef ExtFnName;
if (MOS)
ExtFnName = MOS->findExternalFunctionAt(getOriginalLoadAddr(BeginAddr));
if (!ExtFnName.empty())
return Module->createFunction(ExtFnName);
// If it's not, look for an existing function.
for (MCModule::func_iterator FI = Module->func_begin(),
FE = Module->func_end();
FI != FE; ++FI) {
if ((*FI)->empty())
continue;
// FIXME: MCModule should provide a findFunctionByAddr()
if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr)
return FI->get();
}
// Finally, just create a new one.
MCFunction *MCFN = Module->createFunction("");
getBBAt(Module, MCFN, BeginAddr, CallTargets, TailCallTargets);
return MCFN;
}
// MachO MCObjectDisassembler implementation.
MCMachOObjectDisassembler::MCMachOObjectDisassembler(
const MachOObjectFile &MOOF, const MCDisassembler &Dis,
const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
uint64_t HeaderLoadAddress)
: MCObjectDisassembler(MOOF, Dis, MIA), MOOF(MOOF),
VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) {
for (const SectionRef &Section : MOOF.sections()) {
StringRef Name;
Section.getName(Name);
// FIXME: We should use the S_ section type instead of the name.
if (Name == "__mod_init_func") {
DEBUG(dbgs() << "Found __mod_init_func section!\n");
Section.getContents(ModInitContents);
} else if (Name == "__mod_exit_func") {
DEBUG(dbgs() << "Found __mod_exit_func section!\n");
Section.getContents(ModExitContents);
}
}
}
// FIXME: Only do the translations for addresses actually inside the object.
uint64_t MCMachOObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
return Addr + VMAddrSlide;
}
uint64_t
MCMachOObjectDisassembler::getOriginalLoadAddr(uint64_t EffectiveAddr) {
return EffectiveAddr - VMAddrSlide;
}
uint64_t MCMachOObjectDisassembler::getEntrypoint() {
uint64_t EntryFileOffset = 0;
// Look for LC_MAIN.
{
uint32_t LoadCommandCount = MOOF.getHeader().ncmds;
MachOObjectFile::LoadCommandInfo Load = MOOF.getFirstLoadCommandInfo();
for (unsigned I = 0;; ++I) {
if (Load.C.cmd == MachO::LC_MAIN) {
EntryFileOffset =
((const MachO::entry_point_command *)Load.Ptr)->entryoff;
break;
}
if (I == LoadCommandCount - 1)
break;
else
Load = MOOF.getNextLoadCommandInfo(Load);
}
}
// If we didn't find anything, default to the common implementation.
// FIXME: Maybe we could also look at LC_UNIXTHREAD and friends?
if (EntryFileOffset)
return MCObjectDisassembler::getEntrypoint();
return EntryFileOffset + HeaderLoadAddress;
}
ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticInitFunctions() {
// FIXME: We only handle 64bit mach-o
assert(MOOF.is64Bit());
size_t EntrySize = 8;
size_t EntryCount = ModInitContents.size() / EntrySize;
return makeArrayRef(
reinterpret_cast<const uint64_t *>(ModInitContents.data()), EntryCount);
}
ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticExitFunctions() {
// FIXME: We only handle 64bit mach-o
assert(MOOF.is64Bit());
size_t EntrySize = 8;
size_t EntryCount = ModExitContents.size() / EntrySize;
return makeArrayRef(
reinterpret_cast<const uint64_t *>(ModExitContents.data()), EntryCount);
}

View File

@ -1,268 +0,0 @@
//===-- lib/MC/MCObjectSymbolizer.cpp -------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAnalysis/MCObjectSymbolizer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRelocationInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/MachO.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using namespace llvm;
using namespace object;
//===- MCMachObjectSymbolizer ---------------------------------------------===//
namespace {
class MCMachObjectSymbolizer : public MCObjectSymbolizer {
const MachOObjectFile *MOOF;
// __TEXT;__stubs support.
uint64_t StubsStart;
uint64_t StubsCount;
uint64_t StubSize;
uint64_t StubsIndSymIndex;
public:
MCMachObjectSymbolizer(MCContext &Ctx,
std::unique_ptr<MCRelocationInfo> RelInfo,
const MachOObjectFile *MOOF);
StringRef findExternalFunctionAt(uint64_t Addr) override;
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
uint64_t Address) override;
};
} // End unnamed namespace
MCMachObjectSymbolizer::MCMachObjectSymbolizer(
MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo,
const MachOObjectFile *MOOF)
: MCObjectSymbolizer(Ctx, std::move(RelInfo), MOOF), MOOF(MOOF),
StubsStart(0), StubsCount(0), StubSize(0), StubsIndSymIndex(0) {
for (const SectionRef &Section : MOOF->sections()) {
StringRef Name;
Section.getName(Name);
if (Name == "__stubs") {
SectionRef StubsSec = Section;
if (MOOF->is64Bit()) {
MachO::section_64 S = MOOF->getSection64(StubsSec.getRawDataRefImpl());
StubsIndSymIndex = S.reserved1;
StubSize = S.reserved2;
} else {
MachO::section S = MOOF->getSection(StubsSec.getRawDataRefImpl());
StubsIndSymIndex = S.reserved1;
StubSize = S.reserved2;
}
assert(StubSize && "Mach-O stub entry size can't be zero!");
StubsSec.getAddress(StubsStart);
StubsSec.getSize(StubsCount);
StubsCount /= StubSize;
}
}
}
StringRef MCMachObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) {
// FIXME: also, this can all be done at the very beginning, by iterating over
// all stubs and creating the calls to outside functions. Is it worth it
// though?
if (!StubSize)
return StringRef();
uint64_t StubIdx = (Addr - StubsStart) / StubSize;
if (StubIdx >= StubsCount)
return StringRef();
uint32_t SymtabIdx =
MOOF->getIndirectSymbolTableEntry(MOOF->getDysymtabLoadCommand(), StubIdx);
StringRef SymName;
symbol_iterator SI = MOOF->symbol_begin();
for (uint32_t i = 0; i != SymtabIdx; ++i)
++SI;
SI->getName(SymName);
assert(SI != MOOF->symbol_end() && "Stub wasn't found in the symbol table!");
assert(SymName.front() == '_' && "Mach-O symbol doesn't start with '_'!");
return SymName.substr(1);
}
void MCMachObjectSymbolizer::
tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
uint64_t Address) {
if (const RelocationRef *R = findRelocationAt(Address)) {
const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R);
if (!RelExpr || RelExpr->EvaluateAsAbsolute(Value) == false)
return;
}
uint64_t Addr = Value;
if (const SectionRef *S = findSectionContaining(Addr)) {
StringRef Name; S->getName(Name);
uint64_t SAddr; S->getAddress(SAddr);
if (Name == "__cstring") {
StringRef Contents;
S->getContents(Contents);
Contents = Contents.substr(Addr - SAddr);
cStream << " ## literal pool for: "
<< Contents.substr(0, Contents.find_first_of(0));
}
}
}
//===- MCObjectSymbolizer -------------------------------------------------===//
MCObjectSymbolizer::MCObjectSymbolizer(
MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo,
const ObjectFile *Obj)
: MCSymbolizer(Ctx, std::move(RelInfo)), Obj(Obj), SortedSections(),
AddrToReloc() {}
bool MCObjectSymbolizer::
tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream,
int64_t Value, uint64_t Address, bool IsBranch,
uint64_t Offset, uint64_t InstSize) {
if (IsBranch) {
StringRef ExtFnName = findExternalFunctionAt((uint64_t)Value);
if (!ExtFnName.empty()) {
MCSymbol *Sym = Ctx.GetOrCreateSymbol(ExtFnName);
const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
MI.addOperand(MCOperand::CreateExpr(Expr));
return true;
}
}
if (const RelocationRef *R = findRelocationAt(Address + Offset)) {
if (const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R)) {
MI.addOperand(MCOperand::CreateExpr(RelExpr));
return true;
}
// Only try to create a symbol+offset expression if there is no relocation.
return false;
}
// Interpret Value as a branch target.
if (IsBranch == false)
return false;
uint64_t UValue = Value;
// FIXME: map instead of looping each time?
for (const SymbolRef &Symbol : Obj->symbols()) {
uint64_t SymAddr;
Symbol.getAddress(SymAddr);
uint64_t SymSize;
Symbol.getSize(SymSize);
StringRef SymName;
Symbol.getName(SymName);
SymbolRef::Type SymType;
Symbol.getType(SymType);
if (SymAddr == UnknownAddressOrSize || SymSize == UnknownAddressOrSize ||
SymName.empty() || SymType != SymbolRef::ST_Function)
continue;
if ( SymAddr == UValue ||
(SymAddr <= UValue && SymAddr + SymSize > UValue)) {
MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName);
const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
if (SymAddr != UValue) {
const MCExpr *Off = MCConstantExpr::Create(UValue - SymAddr, Ctx);
Expr = MCBinaryExpr::CreateAdd(Expr, Off, Ctx);
}
MI.addOperand(MCOperand::CreateExpr(Expr));
return true;
}
}
return false;
}
void MCObjectSymbolizer::
tryAddingPcLoadReferenceComment(raw_ostream &cStream,
int64_t Value, uint64_t Address) {
}
StringRef MCObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) {
return StringRef();
}
MCObjectSymbolizer *MCObjectSymbolizer::createObjectSymbolizer(
MCContext &Ctx, std::unique_ptr<MCRelocationInfo> RelInfo,
const ObjectFile *Obj) {
if (const MachOObjectFile *MOOF = dyn_cast<MachOObjectFile>(Obj))
return new MCMachObjectSymbolizer(Ctx, std::move(RelInfo), MOOF);
return new MCObjectSymbolizer(Ctx, std::move(RelInfo), Obj);
}
// SortedSections implementation.
static bool SectionStartsBefore(const SectionRef &S, uint64_t Addr) {
uint64_t SAddr; S.getAddress(SAddr);
return SAddr < Addr;
}
const SectionRef *MCObjectSymbolizer::findSectionContaining(uint64_t Addr) {
if (SortedSections.empty())
buildSectionList();
SortedSectionList::iterator
EndIt = SortedSections.end(),
It = std::lower_bound(SortedSections.begin(), EndIt,
Addr, SectionStartsBefore);
if (It == EndIt)
return nullptr;
uint64_t SAddr; It->getAddress(SAddr);
uint64_t SSize; It->getSize(SSize);
if (Addr >= SAddr + SSize)
return nullptr;
return &*It;
}
const RelocationRef *MCObjectSymbolizer::findRelocationAt(uint64_t Addr) {
if (AddrToReloc.empty())
buildRelocationByAddrMap();
AddrToRelocMap::const_iterator RI = AddrToReloc.find(Addr);
if (RI == AddrToReloc.end())
return nullptr;
return &RI->second;
}
void MCObjectSymbolizer::buildSectionList() {
for (const SectionRef &Section : Obj->sections()) {
bool RequiredForExec;
Section.isRequiredForExecution(RequiredForExec);
if (RequiredForExec == false)
continue;
uint64_t SAddr;
Section.getAddress(SAddr);
uint64_t SSize;
Section.getSize(SSize);
SortedSectionList::iterator It =
std::lower_bound(SortedSections.begin(), SortedSections.end(), SAddr,
SectionStartsBefore);
if (It != SortedSections.end()) {
uint64_t FoundSAddr; It->getAddress(FoundSAddr);
if (FoundSAddr < SAddr + SSize)
llvm_unreachable("Inserting overlapping sections");
}
SortedSections.insert(It, Section);
}
}
void MCObjectSymbolizer::buildRelocationByAddrMap() {
for (const SectionRef &Section : Obj->sections()) {
for (const RelocationRef &Reloc : Section.relocations()) {
uint64_t Address;
Reloc.getAddress(Address);
// At a specific address, only keep the first relocation.
if (AddrToReloc.find(Address) == AddrToReloc.end())
AddrToReloc[Address] = Reloc;
}
}
}

View File

@ -1,14 +0,0 @@
##===- lib/MC/MCAnalysys/Makefile --------------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../..
LIBRARYNAME = LLVMMCAnalysis
BUILD_ARCHIVE := 1
include $(LEVEL)/Makefile.common

View File

@ -10,7 +10,7 @@
LEVEL = ../..
LIBRARYNAME = LLVMMC
BUILD_ARCHIVE := 1
PARALLEL_DIRS := MCAnalysis MCParser MCDisassembler
PARALLEL_DIRS := MCParser MCDisassembler
include $(LEVEL)/Makefile.common

View File

@ -1,58 +0,0 @@
# RUN: yaml2obj -format=elf %s | llvm-objdump -d -yaml-cfg=%t - && FileCheck --check-prefix=CFG < %t %s
# REQUIRES: shell
#
# Generated from:
# main:
# .LBL0_1:
# movq 8(%rsi), %rax
# <invalid opcode: 06>
# nop
!ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Type: ET_REL
Machine: EM_X86_64
Sections:
- Name: .text
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
Content: "488B46080690"
## 0000000000000000 <main>:
#CFG: Atoms:
#CFG: - StartAddress: 0x0000000000000000
#CFG: Size: 4
#CFG: Type: Text
## 0: 48 8b 46 08 mov 0x8(%rsi),%rax
#CFG: - Inst: MOV64rm
#CFG: Size: 4
#CFG: Ops: [ RRAX, RRSI, I1, R, I8, R ]
#CFG: - StartAddress: 0x0000000000000004
#CFG: Size: 1
#CFG: Type: Data
## 4: 06 (bad)
#CFG: Content: '06'
#CFG: - StartAddress: 0x0000000000000005
#CFG: Size: 1
#CFG: Type: Text
## 5: 90 nop
#CFG: - Inst: NOOP
#CFG: Size: 1
#CFG: Ops: [ ]
Symbols:
Global:
- Name: main
Type: STT_FUNC
Section: .text
Value: 0x0
Size: 6

View File

@ -1,39 +0,0 @@
# RUN: yaml2obj -format=elf %s | llvm-objdump -d -yaml-cfg=%t - && FileCheck --check-prefix=CFG < %t %s
# REQUIRES: shell
#
# Generated from:
# main:
# .LBL0_1:
# jmp .LBL0_1
#
!ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Type: ET_REL
Machine: EM_X86_64
Sections:
- Name: .text
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
Content: "EBFE"
## 0000000000000000 <main>:
#CFG: Atoms:
#CFG: - StartAddress: 0x0000000000000000
#CFG: Size: 2
## 0: eb fe jmp $-2
#CFG: - Inst: JMP_1
#CFG: Size: 2
#CFG: Ops: [ I-2 ]
Symbols:
Global:
- Name: main
Type: STT_FUNC
Section: .text
Value: 0x0
Size: 2

View File

@ -1,86 +0,0 @@
# RUN: yaml2obj -format=elf %s | llvm-objdump -d -yaml-cfg=%t - && FileCheck --check-prefix=CFG < %t %s
# REQUIRES: shell
#
# Generated from:
# main:
# movl $48, %eax
# cmpl $3, %edi
# jl .LBB0_2
# movq 8(%rsi), %rax
# movsbl (%rax), %eax
# .LBB0_2:
# ret
#
!ELF
FileHeader:
Class: ELFCLASS64
Data: ELFDATA2LSB
Type: ET_REL
Machine: EM_X86_64
Sections:
- Name: .text
Type: SHT_PROGBITS
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
Content: "B83000000083FF037C07488B46080FBE00C3"
## 0000000000000000 <main>:
#CFG: Atoms:
#CFG: - StartAddress: 0x0000000000000000
#CFG: Size: 10
## 0: b8 30 00 00 00 mov $0x30,%eax
#CFG: - Inst: MOV32ri
#CFG: Size: 5
#CFG: Ops: [ REAX, I48 ]
## 5: 83 ff 03 cmp $0x3,%edi
#CFG: - Inst: CMP32ri8
#CFG: Size: 3
#CFG: Ops: [ REDI, I3 ]
## 8: 7c 07 jl 11 <main+0x11>
#CFG: - Inst: JL_1
#CFG: Size: 2
#CFG: Ops: [ I7 ]
#CFG: - StartAddress: 0x000000000000000A
#CFG: Size: 7
## a: 48 8b 46 08 mov 0x8(%rsi),%rax
#CFG: - Inst: MOV64rm
#CFG: Size: 4
#CFG: Ops: [ RRAX, RRSI, I1, R, I8, R ]
## e: 0f be 00 movsbl (%rax),%eax
#CFG: - Inst: MOVSX32rm8
#CFG: Size: 3
#CFG: Ops: [ REAX, RRAX, I1, R, I0, R ]
#CFG: - StartAddress: 0x0000000000000011
#CFG: Size: 1
## 11: c3 retq
#CFG: - Inst: RET
#CFG: Size: 1
#CFG: Ops: [ ]
Symbols:
Global:
- Name: main
Type: STT_FUNC
Section: .text
Value: 0x0
Size: 18
#CFG: Functions:
#CFG: BasicBlocks:
#CFG: - Address: 0x0000000000000000
#CFG: Preds: [ ]
#CFG: Succs: [ 0x0000000000000011, 0x000000000000000A ]
#CFG: - Address: 0x0000000000000011
#CFG: Preds: [ 0x0000000000000000, 0x000000000000000A ]
#CFG: Succs: [ ]
#CFG: - Address: 0x000000000000000A
#CFG: Preds: [ 0x0000000000000000 ]
#CFG: Succs: [ 0x0000000000000011 ]

View File

@ -1,68 +0,0 @@
RUN: llvm-objdump -d -symbolize %p/../Inputs/trivial-object-test.elf-x86-64 \
RUN: | FileCheck %s -check-prefix ELF-x86-64
RUN: llvm-objdump -d -symbolize %p/../Inputs/trivial-object-test.macho-x86-64 \
RUN: | FileCheck %s -check-prefix MACHO-x86-64
# Generate this using:
# ld trivial-object-test.macho-x86-64 -undefined dynamic_lookup
RUN: llvm-objdump -d -symbolize %p/../Inputs/trivial-executable-test.macho-x86-64 \
RUN: | FileCheck %s -check-prefix MACHO-STUBS-x86-64
ELF-x86-64: file format ELF64-x86-64
ELF-x86-64: Disassembly of section .text:
ELF-x86-64: main:
ELF-x86-64: 0: 48 83 ec 08 subq $8, %rsp
ELF-x86-64: 4: c7 44 24 04 00 00 00 00 movl $0, 4(%rsp)
ELF-x86-64: c: bf 00 00 00 00 movl $.rodata.str1.1, %edi
ELF-x86-64: 11: e8 00 00 00 00 callq puts-4
ELF-x86-64: 16: 30 c0 xorb %al, %al
ELF-x86-64: 18: e8 00 00 00 00 callq SomeOtherFunction-4
ELF-x86-64: 1d: 8b 44 24 04 movl 4(%rsp), %eax
ELF-x86-64: 21: 48 83 c4 08 addq $8, %rsp
ELF-x86-64: 25: c3 ret
MACHO-x86-64: file format Mach-O 64-bit x86-64
MACHO-x86-64: Disassembly of section __TEXT,__text:
MACHO-x86-64: _main:
MACHO-x86-64: 0: 48 83 ec 08 subq $8, %rsp
MACHO-x86-64: 4: c7 44 24 04 00 00 00 00 movl $0, 4(%rsp)
MACHO-x86-64: c: 48 8d 3d 00 00 00 00 leaq L_.str(%rip), %rdi ## literal pool for: Hello World!
MACHO-x86-64: 13: e8 00 00 00 00 callq _puts
MACHO-x86-64: 18: 30 c0 xorb %al, %al
MACHO-x86-64: 1a: e8 00 00 00 00 callq _SomeOtherFunction
MACHO-x86-64: 1f: 8b 44 24 04 movl 4(%rsp), %eax
MACHO-x86-64: 23: 48 83 c4 08 addq $8, %rsp
MACHO-x86-64: 27: c3 ret
MACHO-STUBS-x86-64: file format Mach-O 64-bit x86-64
MACHO-STUBS-x86-64: Disassembly of section __TEXT,__text:
MACHO-STUBS-x86-64: _main:
MACHO-STUBS-x86-64: 1f90: 48 83 ec 08 subq $8, %rsp
MACHO-STUBS-x86-64: 1f94: c7 44 24 04 00 00 00 00 movl $0, 4(%rsp)
MACHO-STUBS-x86-64: 1f9c: 48 8d 3d 45 00 00 00 leaq 69(%rip), %rdi ## literal pool for: Hello World!
MACHO-STUBS-x86-64: 1fa3: e8 16 00 00 00 callq puts
MACHO-STUBS-x86-64: 1fa8: 30 c0 xorb %al, %al
MACHO-STUBS-x86-64: 1faa: e8 09 00 00 00 callq SomeOtherFunction
MACHO-STUBS-x86-64: 1faf: 8b 44 24 04 movl 4(%rsp), %eax
MACHO-STUBS-x86-64: 1fb3: 48 83 c4 08 addq $8, %rsp
MACHO-STUBS-x86-64: 1fb7: c3 ret
RUN: llvm-objdump -d -symbolize %p/../Inputs/relocation-relocatable.elf-i386 \
RUN: | FileCheck %s -check-prefix ELF-i386-REL
ELF-i386-REL: Disassembly of section .text:
ELF-i386-REL-NEXT: f:
ELF-i386-REL-NEXT: 0: e9 fc ff ff ff jmp h
ELF-i386-REL: g:
ELF-i386-REL-NEXT: 5: e9 fc ff ff ff jmp f
RUN: llvm-objdump -d -symbolize %p/../Inputs/relocation-dynamic.elf-i386 \
RUN: | FileCheck %s -check-prefix ELF-i386-DYN
ELF-i386-DYN: Disassembly of section .text:
ELF-i386-DYN-NEXT: f:
ELF-i386-DYN-NEXT: 1a4: e9 fc ff ff ff jmp h
ELF-i386-DYN: g:
ELF-i386-DYN-NEXT: 1a9: e9 fc ff ff ff jmp f

View File

@ -2,7 +2,6 @@ set(LLVM_LINK_COMPONENTS
${LLVM_TARGETS_TO_BUILD}
DebugInfo
MC
MCAnalysis
MCDisassembler
Object
Support

View File

@ -19,4 +19,4 @@
type = Tool
name = llvm-objdump
parent = Tools
required_libraries = DebugInfo MC MCAnalysis MCDisassembler MCParser Object all-targets
required_libraries = DebugInfo MC MCDisassembler MCParser Object all-targets

View File

@ -308,8 +308,6 @@ static void DisassembleInputMachO2(StringRef Filename,
MachO::mach_header Header = MachOOF->getHeader();
// FIXME: FoundFns isn't used anymore. Using symbols/LC_FUNCTION_STARTS to
// determine function locations will eventually go in MCObjectDisassembler.
// FIXME: Using the -cfg command line option, this code used to be able to
// annotate relocations with the referenced symbol's name, and if this was
// inside a __[cf]string section, the data it points to. This is now replaced

View File

@ -9,7 +9,7 @@
LEVEL := ../..
TOOLNAME := llvm-objdump
LINK_COMPONENTS := all-targets DebugInfo MC MCAnalysis MCParser MCDisassembler Object
LINK_COMPONENTS := all-targets DebugInfo MC MCParser MCDisassembler Object
# This tool has no plugins, optimize startup time.
TOOL_NO_EXPORTS := 1

View File

@ -20,11 +20,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAnalysis/MCAtom.h"
#include "llvm/MC/MCAnalysis/MCFunction.h"
#include "llvm/MC/MCAnalysis/MCModule.h"
#include "llvm/MC/MCAnalysis/MCModuleYAML.h"
#include "llvm/MC/MCAnalysis/MCObjectSymbolizer.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
@ -32,7 +27,6 @@
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectDisassembler.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCRelocationInfo.h"
@ -141,20 +135,6 @@ static cl::alias
PrivateHeadersShort("p", cl::desc("Alias for --private-headers"),
cl::aliasopt(PrivateHeaders));
static cl::opt<bool>
Symbolize("symbolize", cl::desc("When disassembling instructions, "
"try to symbolize operands."));
static cl::opt<bool>
CFG("cfg", cl::desc("Create a CFG for every function found in the object"
" and write it to a graphviz file"));
// FIXME: Does it make sense to have a dedicated tool for yaml cfg output?
static cl::opt<std::string>
YAMLCFG("yaml-cfg",
cl::desc("Create a CFG and write it as a YAML MCModule."),
cl::value_desc("yaml output file"));
static StringRef ToolName;
bool llvm::error(std::error_code EC) {
@ -200,53 +180,6 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) {
return TheTarget;
}
// Write a graphviz file for the CFG inside an MCFunction.
// FIXME: Use GraphWriter
static void emitDOTFile(const char *FileName, const MCFunction &f,
MCInstPrinter *IP) {
// Start a new dot file.
std::error_code EC;
raw_fd_ostream Out(FileName, EC, sys::fs::F_Text);
if (EC) {
errs() << "llvm-objdump: warning: " << EC.message() << '\n';
return;
}
Out << "digraph \"" << f.getName() << "\" {\n";
Out << "graph [ rankdir = \"LR\" ];\n";
for (MCFunction::const_iterator i = f.begin(), e = f.end(); i != e; ++i) {
// Only print blocks that have predecessors.
bool hasPreds = (*i)->pred_begin() != (*i)->pred_end();
if (!hasPreds && i != f.begin())
continue;
Out << '"' << (*i)->getInsts()->getBeginAddr() << "\" [ label=\"<a>";
// Print instructions.
for (unsigned ii = 0, ie = (*i)->getInsts()->size(); ii != ie;
++ii) {
if (ii != 0) // Not the first line, start a new row.
Out << '|';
if (ii + 1 == ie) // Last line, add an end id.
Out << "<o>";
// Escape special chars and print the instruction in mnemonic form.
std::string Str;
raw_string_ostream OS(Str);
IP->printInst(&(*i)->getInsts()->at(ii).Inst, OS, "");
Out << DOT::EscapeString(OS.str());
}
Out << "\" shape=\"record\" ];\n";
// Add edges.
for (MCBasicBlock::succ_const_iterator si = (*i)->succ_begin(),
se = (*i)->succ_end(); si != se; ++si)
Out << (*i)->getInsts()->getBeginAddr() << ":o -> "
<< (*si)->getInsts()->getBeginAddr() << ":a\n";
}
Out << "}\n";
}
void llvm::DumpBytes(StringRef bytes) {
static const char hex_rep[] = "0123456789abcdef";
// FIXME: The real way to do this is to figure out the longest instruction
@ -335,19 +268,6 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
return;
}
if (Symbolize) {
std::unique_ptr<MCRelocationInfo> RelInfo(
TheTarget->createMCRelocationInfo(TripleName, Ctx));
if (RelInfo) {
std::unique_ptr<MCSymbolizer> Symzer(
MCObjectSymbolizer::createObjectSymbolizer(Ctx, std::move(RelInfo),
Obj));
if (Symzer)
DisAsm->setSymbolizer(std::move(Symzer));
}
}
std::unique_ptr<const MCInstrAnalysis> MIA(
TheTarget->createMCInstrAnalysis(MII.get()));
@ -360,45 +280,6 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
return;
}
if (CFG || !YAMLCFG.empty()) {
std::unique_ptr<MCObjectDisassembler> OD(
new MCObjectDisassembler(*Obj, *DisAsm, *MIA));
std::unique_ptr<MCModule> Mod(OD->buildModule(/* withCFG */ true));
for (MCModule::const_atom_iterator AI = Mod->atom_begin(),
AE = Mod->atom_end();
AI != AE; ++AI) {
outs() << "Atom " << (*AI)->getName() << ": \n";
if (const MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI)) {
for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
II != IE;
++II) {
IP->printInst(&II->Inst, outs(), "");
outs() << "\n";
}
}
}
if (CFG) {
for (MCModule::const_func_iterator FI = Mod->func_begin(),
FE = Mod->func_end();
FI != FE; ++FI) {
static int filenum = 0;
emitDOTFile((Twine((*FI)->getName()) + "_" +
utostr(filenum) + ".dot").str().c_str(),
**FI, IP.get());
++filenum;
}
}
if (!YAMLCFG.empty()) {
std::error_code EC;
raw_fd_ostream YAMLOut(YAMLCFG, EC, sys::fs::F_Text);
if (EC) {
errs() << ToolName << ": warning: " << EC.message() << '\n';
return;
}
mcmodule2yaml(YAMLOut, *Mod, *MII, *MRI);
}
}
StringRef Fmt = Obj->getBytesInAddress() > 4 ? "\t\t%016" PRIx64 ": " :
"\t\t\t%08" PRIx64 ": ";

View File

@ -1,11 +1,9 @@
set(LLVM_LINK_COMPONENTS
MC
MCAnalysis
Support
)
add_llvm_unittest(MCTests
MCAtomTest.cpp
StringTableBuilderTest.cpp
YAMLTest.cpp
)

View File

@ -1,31 +0,0 @@
//===- llvm/unittest/MC/MCAtomTest.cpp - Instructions unit tests ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAnalysis/MCAtom.h"
#include "llvm/MC/MCAnalysis/MCModule.h"
#include "gtest/gtest.h"
namespace llvm {
namespace {
TEST(MCAtomTest, MCDataSize) {
MCModule M;
MCDataAtom *Atom = M.createDataAtom(0, 0);
EXPECT_EQ(uint64_t(0), Atom->getEndAddr());
Atom->addData(0);
EXPECT_EQ(uint64_t(0), Atom->getEndAddr());
Atom->addData(1);
EXPECT_EQ(uint64_t(1), Atom->getEndAddr());
Atom->addData(2);
EXPECT_EQ(uint64_t(2), Atom->getEndAddr());
EXPECT_EQ(size_t(3), Atom->getData().size());
}
} // end anonymous namespace
} // end namespace llvm

View File

@ -9,7 +9,7 @@
LEVEL = ../..
TESTNAME = MC
LINK_COMPONENTS := MCAnalysis
LINK_COMPONENTS := Object
include $(LEVEL)/Makefile.config
include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest