MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "llvm/MC/MCObjectDisassembler.h"
|
|
|
|
#include "llvm/ADT/SetVector.h"
|
2013-08-21 15:28:51 +08:00
|
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
#include "llvm/ADT/StringExtras.h"
|
|
|
|
#include "llvm/ADT/StringRef.h"
|
|
|
|
#include "llvm/ADT/Twine.h"
|
|
|
|
#include "llvm/MC/MCAtom.h"
|
|
|
|
#include "llvm/MC/MCDisassembler.h"
|
|
|
|
#include "llvm/MC/MCFunction.h"
|
|
|
|
#include "llvm/MC/MCInstrAnalysis.h"
|
|
|
|
#include "llvm/MC/MCModule.h"
|
2013-08-21 15:28:48 +08:00
|
|
|
#include "llvm/MC/MCObjectSymbolizer.h"
|
2013-08-21 15:28:44 +08:00
|
|
|
#include "llvm/Object/MachO.h"
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
#include "llvm/Object/ObjectFile.h"
|
2013-08-21 15:28:44 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/MachO.h"
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
#include "llvm/Support/MemoryObject.h"
|
|
|
|
#include "llvm/Support/StringRefMemoryObject.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include <map>
|
2013-08-21 15:28:51 +08:00
|
|
|
#include <vector>
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
using namespace object;
|
|
|
|
|
|
|
|
MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj,
|
|
|
|
const MCDisassembler &Dis,
|
|
|
|
const MCInstrAnalysis &MIA)
|
2013-08-21 15:28:48 +08:00
|
|
|
: Obj(Obj), Dis(Dis), MIA(MIA), MOS(0) {}
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
|
2013-08-21 15:28:29 +08:00
|
|
|
uint64_t MCObjectDisassembler::getEntrypoint() {
|
|
|
|
error_code ec;
|
|
|
|
for (symbol_iterator SI = Obj.begin_symbols(), SE = Obj.end_symbols();
|
|
|
|
SI != SE; SI.increment(ec)) {
|
|
|
|
if (ec)
|
|
|
|
break;
|
|
|
|
StringRef Name;
|
|
|
|
SI->getName(Name);
|
|
|
|
if (Name == "main" || Name == "_main") {
|
|
|
|
uint64_t Entrypoint;
|
|
|
|
SI->getAddress(Entrypoint);
|
2013-08-21 15:28:37 +08:00
|
|
|
return getEffectiveLoadAddr(Entrypoint);
|
2013-08-21 15:28:29 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
ArrayRef<uint64_t> MCObjectDisassembler::getStaticInitFunctions() {
|
|
|
|
return ArrayRef<uint64_t>();
|
|
|
|
}
|
|
|
|
|
|
|
|
ArrayRef<uint64_t> MCObjectDisassembler::getStaticExitFunctions() {
|
|
|
|
return ArrayRef<uint64_t>();
|
|
|
|
}
|
|
|
|
|
2013-08-21 15:28:37 +08:00
|
|
|
uint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
|
|
|
|
return Addr;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t MCObjectDisassembler::getOriginalLoadAddr(uint64_t Addr) {
|
|
|
|
return Addr;
|
|
|
|
}
|
|
|
|
|
2013-08-21 15:28:29 +08:00
|
|
|
MCModule *MCObjectDisassembler::buildEmptyModule() {
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
MCModule *Module = new MCModule;
|
2013-08-21 15:28:29 +08:00
|
|
|
Module->Entrypoint = getEntrypoint();
|
|
|
|
return Module;
|
|
|
|
}
|
|
|
|
|
|
|
|
MCModule *MCObjectDisassembler::buildModule(bool withCFG) {
|
|
|
|
MCModule *Module = buildEmptyModule();
|
|
|
|
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
buildSectionAtoms(Module);
|
|
|
|
if (withCFG)
|
|
|
|
buildCFG(Module);
|
|
|
|
return Module;
|
|
|
|
}
|
|
|
|
|
|
|
|
void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
|
|
|
|
error_code ec;
|
|
|
|
for (section_iterator SI = Obj.begin_sections(),
|
|
|
|
SE = Obj.end_sections();
|
|
|
|
SI != SE;
|
|
|
|
SI.increment(ec)) {
|
|
|
|
if (ec) break;
|
|
|
|
|
|
|
|
bool isText; SI->isText(isText);
|
|
|
|
bool isData; SI->isData(isData);
|
|
|
|
if (!isData && !isText)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
uint64_t StartAddr; SI->getAddress(StartAddr);
|
|
|
|
uint64_t SecSize; SI->getSize(SecSize);
|
|
|
|
if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize)
|
|
|
|
continue;
|
2013-08-21 15:28:37 +08:00
|
|
|
StartAddr = getEffectiveLoadAddr(StartAddr);
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
|
|
|
|
StringRef Contents; SI->getContents(Contents);
|
2013-08-21 15:28:29 +08:00
|
|
|
StringRefMemoryObject memoryObject(Contents, StartAddr);
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
|
|
|
|
// We don't care about things like non-file-backed sections yet.
|
|
|
|
if (Contents.size() != SecSize || !SecSize)
|
|
|
|
continue;
|
|
|
|
uint64_t EndAddr = StartAddr + SecSize - 1;
|
|
|
|
|
|
|
|
StringRef SecName; SI->getName(SecName);
|
|
|
|
|
|
|
|
if (isText) {
|
2013-08-21 15:28:32 +08:00
|
|
|
MCTextAtom *Text = 0;
|
|
|
|
MCDataAtom *InvalidData = 0;
|
|
|
|
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
uint64_t InstSize;
|
|
|
|
for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
|
2013-08-21 15:28:32 +08:00
|
|
|
const uint64_t CurAddr = StartAddr + Index;
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
MCInst Inst;
|
2013-08-21 15:28:32 +08:00
|
|
|
if (Dis.getInstruction(Inst, InstSize, memoryObject, CurAddr, nulls(),
|
|
|
|
nulls())) {
|
|
|
|
if (!Text) {
|
|
|
|
Text = Module->createTextAtom(CurAddr, CurAddr);
|
|
|
|
Text->setName(SecName);
|
|
|
|
}
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
Text->addInst(Inst, InstSize);
|
2013-08-21 15:28:32 +08:00
|
|
|
InvalidData = 0;
|
|
|
|
} else {
|
|
|
|
if (!InvalidData) {
|
|
|
|
Text = 0;
|
|
|
|
InvalidData = Module->createDataAtom(CurAddr, EndAddr);
|
|
|
|
}
|
|
|
|
InvalidData->addData(Contents[Index]);
|
|
|
|
}
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr);
|
|
|
|
Data->setName(SecName);
|
|
|
|
for (uint64_t Index = 0; Index < SecSize; ++Index)
|
|
|
|
Data->addData(Contents[Index]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
struct BBInfo;
|
2013-08-21 15:28:51 +08:00
|
|
|
typedef SmallPtrSet<BBInfo*, 2> BBInfoSetTy;
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
|
|
|
|
struct BBInfo {
|
|
|
|
MCTextAtom *Atom;
|
|
|
|
MCBasicBlock *BB;
|
|
|
|
BBInfoSetTy Succs;
|
|
|
|
BBInfoSetTy Preds;
|
|
|
|
|
2013-08-21 15:28:32 +08:00
|
|
|
BBInfo() : Atom(0), BB(0) {}
|
|
|
|
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
void addSucc(BBInfo &Succ) {
|
|
|
|
Succs.insert(&Succ);
|
|
|
|
Succ.Preds.insert(this);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
void MCObjectDisassembler::buildCFG(MCModule *Module) {
|
|
|
|
typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
|
|
|
|
BBInfoByAddrTy BBInfos;
|
2013-08-21 15:28:51 +08:00
|
|
|
typedef std::vector<uint64_t> AddressSetTy;
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
AddressSetTy Splits;
|
|
|
|
AddressSetTy Calls;
|
|
|
|
|
2013-08-21 15:28:29 +08:00
|
|
|
error_code ec;
|
|
|
|
for (symbol_iterator SI = Obj.begin_symbols(), SE = Obj.end_symbols();
|
|
|
|
SI != SE; SI.increment(ec)) {
|
|
|
|
if (ec)
|
|
|
|
break;
|
|
|
|
SymbolRef::Type SymType;
|
|
|
|
SI->getType(SymType);
|
|
|
|
if (SymType == SymbolRef::ST_Function) {
|
|
|
|
uint64_t SymAddr;
|
|
|
|
SI->getAddress(SymAddr);
|
2013-08-21 15:28:37 +08:00
|
|
|
SymAddr = getEffectiveLoadAddr(SymAddr);
|
2013-08-21 15:28:51 +08:00
|
|
|
Calls.push_back(SymAddr);
|
|
|
|
Splits.push_back(SymAddr);
|
2013-08-21 15:28:29 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
assert(Module->func_begin() == Module->func_end()
|
|
|
|
&& "Module already has a CFG!");
|
|
|
|
|
|
|
|
// First, determine the basic block boundaries and call targets.
|
|
|
|
for (MCModule::atom_iterator AI = Module->atom_begin(),
|
|
|
|
AE = Module->atom_end();
|
|
|
|
AI != AE; ++AI) {
|
|
|
|
MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
|
|
|
|
if (!TA) continue;
|
2013-08-21 15:28:51 +08:00
|
|
|
Calls.push_back(TA->getBeginAddr());
|
2013-06-20 04:18:59 +08:00
|
|
|
BBInfos[TA->getBeginAddr()].Atom = TA;
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
|
|
|
|
II != IE; ++II) {
|
|
|
|
if (MIA.isTerminator(II->Inst))
|
2013-08-21 15:28:51 +08:00
|
|
|
Splits.push_back(II->Address + II->Size);
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
uint64_t Target;
|
|
|
|
if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) {
|
|
|
|
if (MIA.isCall(II->Inst))
|
2013-08-21 15:28:51 +08:00
|
|
|
Calls.push_back(Target);
|
|
|
|
Splits.push_back(Target);
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-21 15:28:51 +08:00
|
|
|
std::sort(Splits.begin(), Splits.end());
|
|
|
|
Splits.erase(std::unique(Splits.begin(), Splits.end()), Splits.end());
|
|
|
|
|
|
|
|
std::sort(Calls.begin(), Calls.end());
|
|
|
|
Calls.erase(std::unique(Calls.begin(), Calls.end()), Calls.end());
|
|
|
|
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
// Split text atoms into basic block atoms.
|
|
|
|
for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
|
|
|
|
SI != SE; ++SI) {
|
|
|
|
MCAtom *A = Module->findAtomContaining(*SI);
|
|
|
|
if (!A) continue;
|
|
|
|
MCTextAtom *TA = cast<MCTextAtom>(A);
|
|
|
|
if (TA->getBeginAddr() == *SI)
|
|
|
|
continue;
|
|
|
|
MCTextAtom *NewAtom = TA->split(*SI);
|
|
|
|
BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom;
|
|
|
|
StringRef BBName = TA->getName();
|
|
|
|
BBName = BBName.substr(0, BBName.find_last_of(':'));
|
|
|
|
NewAtom->setName((BBName + ":" + utohexstr(*SI)).str());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute succs/preds.
|
|
|
|
for (MCModule::atom_iterator AI = Module->atom_begin(),
|
|
|
|
AE = Module->atom_end();
|
|
|
|
AI != AE; ++AI) {
|
|
|
|
MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
|
|
|
|
if (!TA) continue;
|
|
|
|
BBInfo &CurBB = BBInfos[TA->getBeginAddr()];
|
|
|
|
const MCDecodedInst &LI = TA->back();
|
|
|
|
if (MIA.isBranch(LI.Inst)) {
|
|
|
|
uint64_t Target;
|
|
|
|
if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target))
|
|
|
|
CurBB.addSucc(BBInfos[Target]);
|
|
|
|
if (MIA.isConditionalBranch(LI.Inst))
|
|
|
|
CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
|
|
|
|
} else if (!MIA.isTerminator(LI.Inst))
|
|
|
|
CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Create functions and basic blocks.
|
|
|
|
for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end();
|
|
|
|
CI != CE; ++CI) {
|
|
|
|
BBInfo &BBI = BBInfos[*CI];
|
|
|
|
if (!BBI.Atom) continue;
|
|
|
|
|
|
|
|
MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName());
|
|
|
|
|
|
|
|
// Create MCBBs.
|
|
|
|
SmallSetVector<BBInfo*, 16> Worklist;
|
|
|
|
Worklist.insert(&BBI);
|
2013-08-21 15:28:32 +08:00
|
|
|
for (size_t wi = 0; wi < Worklist.size(); ++wi) {
|
|
|
|
BBInfo *BBI = Worklist[wi];
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
if (!BBI->Atom)
|
|
|
|
continue;
|
|
|
|
BBI->BB = &MCFN.createBlock(*BBI->Atom);
|
|
|
|
// Add all predecessors and successors to the worklist.
|
|
|
|
for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
|
|
|
|
SI != SE; ++SI)
|
|
|
|
Worklist.insert(*SI);
|
|
|
|
for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
|
|
|
|
PI != PE; ++PI)
|
|
|
|
Worklist.insert(*PI);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set preds/succs.
|
2013-08-21 15:28:32 +08:00
|
|
|
for (size_t wi = 0; wi < Worklist.size(); ++wi) {
|
|
|
|
BBInfo *BBI = Worklist[wi];
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
MCBasicBlock *MCBB = BBI->BB;
|
|
|
|
if (!MCBB)
|
|
|
|
continue;
|
|
|
|
for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
|
2013-08-21 15:28:32 +08:00
|
|
|
SI != SE; ++SI)
|
|
|
|
if ((*SI)->BB)
|
|
|
|
MCBB->addSuccessor((*SI)->BB);
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
|
2013-08-21 15:28:32 +08:00
|
|
|
PI != PE; ++PI)
|
|
|
|
if ((*PI)->BB)
|
|
|
|
MCBB->addPredecessor((*PI)->BB);
|
MC: Disassembled CFG reconstruction.
This patch builds on some existing code to do CFG reconstruction from
a disassembled binary:
- MCModule represents the binary, and has a list of MCAtoms.
- MCAtom represents either disassembled instructions (MCTextAtom), or
contiguous data (MCDataAtom), and covers a specific range of addresses.
- MCBasicBlock and MCFunction form the reconstructed CFG. An MCBB is
backed by an MCTextAtom, and has the usual successors/predecessors.
- MCObjectDisassembler creates a module from an ObjectFile using a
disassembler. It first builds an atom for each section. It can also
construct the CFG, and this splits the text atoms into basic blocks.
MCModule and MCAtom were only sketched out; MCFunction and MCBB were
implemented under the experimental "-cfg" llvm-objdump -macho option.
This cleans them up for further use; llvm-objdump -d -cfg now generates
graphviz files for each function found in the binary.
In the future, MCObjectDisassembler may be the right place to do
"intelligent" disassembly: for example, handling constant islands is just
a matter of splitting the atom, using information that may be available
in the ObjectFile. Also, better initial atom formation than just using
sections is possible using symbols (and things like Mach-O's
function_starts load command).
This brings two minor regressions in llvm-objdump -macho -cfg:
- The printing of a relocation's referenced symbol.
- An annotation on loop BBs, i.e., which are their own successor.
Relocation printing is replaced by the MCSymbolizer; the basic CFG
annotation will be superseded by more related functionality.
llvm-svn: 182628
2013-05-24 09:07:04 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-08-21 15:28:44 +08:00
|
|
|
|
|
|
|
// MachO MCObjectDisassembler implementation.
|
|
|
|
|
|
|
|
MCMachOObjectDisassembler::MCMachOObjectDisassembler(
|
|
|
|
const MachOObjectFile &MOOF, const MCDisassembler &Dis,
|
|
|
|
const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
|
|
|
|
uint64_t HeaderLoadAddress)
|
|
|
|
: MCObjectDisassembler(MOOF, Dis, MIA), MOOF(MOOF),
|
|
|
|
VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) {
|
|
|
|
|
|
|
|
error_code ec;
|
|
|
|
for (section_iterator SI = MOOF.begin_sections(), SE = MOOF.end_sections();
|
|
|
|
SI != SE; SI.increment(ec)) {
|
|
|
|
if (ec)
|
|
|
|
break;
|
|
|
|
StringRef Name;
|
|
|
|
SI->getName(Name);
|
|
|
|
// FIXME: We should use the S_ section type instead of the name.
|
|
|
|
if (Name == "__mod_init_func") {
|
|
|
|
DEBUG(dbgs() << "Found __mod_init_func section!\n");
|
|
|
|
SI->getContents(ModInitContents);
|
|
|
|
} else if (Name == "__mod_exit_func") {
|
|
|
|
DEBUG(dbgs() << "Found __mod_exit_func section!\n");
|
|
|
|
SI->getContents(ModExitContents);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: Only do the translations for addresses actually inside the object.
|
|
|
|
uint64_t MCMachOObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
|
|
|
|
return Addr + VMAddrSlide;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t
|
|
|
|
MCMachOObjectDisassembler::getOriginalLoadAddr(uint64_t EffectiveAddr) {
|
|
|
|
return EffectiveAddr - VMAddrSlide;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t MCMachOObjectDisassembler::getEntrypoint() {
|
|
|
|
uint64_t EntryFileOffset = 0;
|
|
|
|
|
|
|
|
// Look for LC_MAIN.
|
|
|
|
{
|
|
|
|
uint32_t LoadCommandCount = MOOF.getHeader().NumLoadCommands;
|
|
|
|
MachOObjectFile::LoadCommandInfo Load = MOOF.getFirstLoadCommandInfo();
|
|
|
|
for (unsigned I = 0;; ++I) {
|
|
|
|
if (Load.C.Type == MachO::LoadCommandMain) {
|
|
|
|
EntryFileOffset =
|
|
|
|
((const MachO::entry_point_command *)Load.Ptr)->entryoff;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (I == LoadCommandCount - 1)
|
|
|
|
break;
|
|
|
|
else
|
|
|
|
Load = MOOF.getNextLoadCommandInfo(Load);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we didn't find anything, default to the common implementation.
|
|
|
|
// FIXME: Maybe we could also look at LC_UNIXTHREAD and friends?
|
|
|
|
if (EntryFileOffset)
|
|
|
|
return MCObjectDisassembler::getEntrypoint();
|
|
|
|
|
|
|
|
return EntryFileOffset + HeaderLoadAddress;
|
|
|
|
}
|
|
|
|
|
|
|
|
ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticInitFunctions() {
|
|
|
|
// FIXME: We only handle 64bit mach-o
|
|
|
|
assert(MOOF.is64Bit());
|
|
|
|
|
|
|
|
size_t EntrySize = 8;
|
|
|
|
size_t EntryCount = ModInitContents.size() / EntrySize;
|
|
|
|
return ArrayRef<uint64_t>(
|
|
|
|
reinterpret_cast<const uint64_t *>(ModInitContents.data()), EntryCount);
|
|
|
|
}
|
|
|
|
|
|
|
|
ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticExitFunctions() {
|
|
|
|
// FIXME: We only handle 64bit mach-o
|
|
|
|
assert(MOOF.is64Bit());
|
|
|
|
|
|
|
|
size_t EntrySize = 8;
|
|
|
|
size_t EntryCount = ModExitContents.size() / EntrySize;
|
|
|
|
return ArrayRef<uint64_t>(
|
|
|
|
reinterpret_cast<const uint64_t *>(ModExitContents.data()), EntryCount);
|
|
|
|
}
|