forked from OSchip/llvm-project
Add branch count information to binary CFG
Summary: Changes DataReader to organize branch perf data per function name and sets up logistics to bring this data to BinaryFunction::buildCFG(). To do this, we expand BinaryContext with a const reference to DataReader. This patch also adds the "-dump-functions" flag to force llvm-flo to dump the current state of BinaryFunctions once they are disassembled and their CFG built, allowing us to test whether the builder is sane with LLVM LIT tests. (cherry picked from FBD2534675)
This commit is contained in:
parent
d30423f872
commit
4c1da22ae9
|
@ -35,19 +35,24 @@ void BinaryBasicBlock::addSuccessor(BinaryBasicBlock *Succ,
|
|||
uint64_t Count,
|
||||
uint64_t MispredictedCount) {
|
||||
Successors.push_back(Succ);
|
||||
BranchInfo.push_back({Count, MispredictedCount});
|
||||
Succ->Predecessors.push_back(this);
|
||||
|
||||
// TODO: update weights.
|
||||
}
|
||||
|
||||
void BinaryBasicBlock::removeSuccessor(BinaryBasicBlock *Succ) {
|
||||
Succ->removePredecessor(this);
|
||||
auto I = std::find(succ_begin(), succ_end(), Succ);
|
||||
auto I = succ_begin();
|
||||
auto BI = BranchInfo.begin();
|
||||
for (; I != succ_end(); ++I) {
|
||||
assert(BI != BranchInfo.end() && "missing BranchInfo entry");
|
||||
if (*I == Succ)
|
||||
break;
|
||||
++BI;
|
||||
}
|
||||
assert(I != succ_end() && "no such successor!");
|
||||
|
||||
Successors.erase(I);
|
||||
|
||||
// TODO: update weights.
|
||||
BranchInfo.erase(BI);
|
||||
}
|
||||
|
||||
void BinaryBasicBlock::addPredecessor(BinaryBasicBlock *Pred) {
|
||||
|
|
|
@ -35,6 +35,8 @@ namespace llvm {
|
|||
|
||||
namespace flo {
|
||||
|
||||
class DataReader;
|
||||
|
||||
/// Everything that's needed to process binaries lives here.
|
||||
class BinaryContext {
|
||||
|
||||
|
@ -77,6 +79,8 @@ public:
|
|||
|
||||
MCAsmBackend *MAB;
|
||||
|
||||
const DataReader &DR;
|
||||
|
||||
BinaryContext(std::unique_ptr<MCContext> Ctx,
|
||||
std::unique_ptr<Triple> TheTriple,
|
||||
const Target *TheTarget,
|
||||
|
@ -89,7 +93,8 @@ public:
|
|||
std::unique_ptr<const MCInstrAnalysis> MIA,
|
||||
std::unique_ptr<const MCRegisterInfo> MRI,
|
||||
std::unique_ptr<MCDisassembler> DisAsm,
|
||||
MCAsmBackend *MAB) :
|
||||
MCAsmBackend *MAB,
|
||||
const DataReader &DR) :
|
||||
Ctx(std::move(Ctx)),
|
||||
TheTriple(std::move(TheTriple)),
|
||||
TheTarget(TheTarget),
|
||||
|
@ -102,7 +107,8 @@ public:
|
|||
MIA(std::move(MIA)),
|
||||
MRI(std::move(MRI)),
|
||||
DisAsm(std::move(DisAsm)),
|
||||
MAB(MAB) {}
|
||||
MAB(MAB),
|
||||
DR(DR) {}
|
||||
|
||||
~BinaryContext() {}
|
||||
};
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
#include "BinaryBasicBlock.h"
|
||||
#include "BinaryFunction.h"
|
||||
#include "DataReader.h"
|
||||
|
||||
#undef DEBUG_TYPE
|
||||
#define DEBUG_TYPE "flo"
|
||||
|
@ -61,8 +62,10 @@ void BinaryFunction::print(raw_ostream &OS, bool PrintInstructions) const {
|
|||
<< "\n Orc Section : " << getCodeSectionName()
|
||||
<< "\n IsSimple : " << IsSimple
|
||||
<< "\n BB count : " << BasicBlocks.size()
|
||||
<< "\n Image : 0x" << Twine::utohexstr(ImageAddress)
|
||||
<< "\n}\n";
|
||||
<< "\n Image : 0x" << Twine::utohexstr(ImageAddress);
|
||||
if (ExecutionCount != COUNT_NO_PROFILE)
|
||||
OS << "\n Exec Count : " << ExecutionCount;
|
||||
OS << "\n}\n";
|
||||
|
||||
if (!PrintInstructions || !BC.InstPrinter)
|
||||
return;
|
||||
|
@ -122,10 +125,14 @@ void BinaryFunction::print(raw_ostream &OS, bool PrintInstructions) const {
|
|||
|
||||
if (!BB.Successors.empty()) {
|
||||
OS << " Successors: ";
|
||||
auto BI = BB.BranchInfo.begin();
|
||||
auto Sep = "";
|
||||
for (auto Succ : BB.Successors) {
|
||||
OS << Sep << Succ->getName();
|
||||
assert(BI != BB.BranchInfo.end() && "missing BranchInfo entry");
|
||||
OS << Sep << Succ->getName() << " (mispreds: " << BI->MispredictedCount
|
||||
<< ", count: " << BI->Count << ")";
|
||||
Sep = ", ";
|
||||
++BI;
|
||||
}
|
||||
OS << '\n';
|
||||
}
|
||||
|
@ -307,6 +314,13 @@ bool BinaryFunction::buildCFG() {
|
|||
|
||||
auto &MIA = BC.MIA;
|
||||
|
||||
auto BranchDataOrErr = BC.DR.getFuncBranchData(getName());
|
||||
if (std::error_code EC = BranchDataOrErr.getError()) {
|
||||
DEBUG(dbgs() << "no branch data found for \"" << getName() << "\"\n");
|
||||
} else {
|
||||
ExecutionCount = BC.DR.countBranchesTo(getName());
|
||||
}
|
||||
|
||||
if (!isSimple())
|
||||
return false;
|
||||
|
||||
|
@ -378,9 +392,18 @@ bool BinaryFunction::buildCFG() {
|
|||
BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
|
||||
assert(ToBB && "cannot find BB containing TO branch");
|
||||
|
||||
// TODO: add weights here.
|
||||
//
|
||||
FromBB->addSuccessor(ToBB);
|
||||
if (std::error_code EC = BranchDataOrErr.getError()) {
|
||||
FromBB->addSuccessor(ToBB);
|
||||
} else {
|
||||
const FuncBranchData &BranchData = BranchDataOrErr.get();
|
||||
auto BranchInfoOrErr = BranchData.getBranch(Branch.first, Branch.second);
|
||||
if (std::error_code EC = BranchInfoOrErr.getError()) {
|
||||
FromBB->addSuccessor(ToBB);
|
||||
} else {
|
||||
const BranchInfo &BInfo = BranchInfoOrErr.get();
|
||||
FromBB->addSuccessor(ToBB, BInfo.Branches, BInfo.Mispreds);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add fall-through branches.
|
||||
|
|
|
@ -18,6 +18,25 @@
|
|||
namespace llvm {
|
||||
namespace flo {
|
||||
|
||||
ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
|
||||
uint64_t To) const {
|
||||
for (const auto &I : Data) {
|
||||
if (I.From.Offset == From && I.To.Offset == To)
|
||||
return I;
|
||||
}
|
||||
return make_error_code(llvm::errc::invalid_argument);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
FuncBranchData::countBranchesTo(StringRef FuncName) const {
|
||||
uint64_t TotalCount = 0;
|
||||
for (const auto &I : Data) {
|
||||
if (I.To.Offset == 0 && I.To.Name == FuncName)
|
||||
TotalCount += I.Branches;
|
||||
}
|
||||
return TotalCount;
|
||||
}
|
||||
|
||||
ErrorOr<std::unique_ptr<DataReader>>
|
||||
DataReader::readPerfData(StringRef Path, raw_ostream &Diag) {
|
||||
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
|
||||
|
@ -148,16 +167,45 @@ std::error_code DataReader::parse() {
|
|||
return EC;
|
||||
Col = 0;
|
||||
Line += 1;
|
||||
|
||||
BranchInfo BI = Res.get();
|
||||
ParsedData.emplace_back(std::move(BI));
|
||||
StringRef Name = BI.From.Name;
|
||||
auto I = FuncsMap.find(Name);
|
||||
if (I == FuncsMap.end()) {
|
||||
FuncBranchData::ContainerTy Cont;
|
||||
Cont.emplace_back(std::move(BI));
|
||||
FuncsMap.insert(
|
||||
std::make_pair(Name, FuncBranchData(Name, std::move(Cont))));
|
||||
continue;
|
||||
}
|
||||
I->getValue().Data.emplace_back(std::move(BI));
|
||||
}
|
||||
return std::error_code();
|
||||
}
|
||||
|
||||
void DataReader::dump() {
|
||||
for (auto &BI : ParsedData) {
|
||||
Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
|
||||
<< BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
|
||||
ErrorOr<const FuncBranchData &>
|
||||
DataReader::getFuncBranchData(StringRef FuncName) const {
|
||||
const auto I = FuncsMap.find(FuncName);
|
||||
if (I == FuncsMap.end()) {
|
||||
return make_error_code(llvm::errc::invalid_argument);
|
||||
}
|
||||
return I->getValue();
|
||||
}
|
||||
|
||||
uint64_t DataReader::countBranchesTo(StringRef FuncName) const {
|
||||
uint64_t TotalCount = 0;
|
||||
for (const auto &KV : FuncsMap) {
|
||||
TotalCount += KV.getValue().countBranchesTo(FuncName);
|
||||
}
|
||||
return TotalCount;
|
||||
}
|
||||
|
||||
void DataReader::dump() const {
|
||||
for (const auto &Func : FuncsMap) {
|
||||
for (const auto &BI : Func.getValue().Data) {
|
||||
Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
|
||||
<< BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,12 +46,28 @@ struct BranchInfo {
|
|||
Branches(Branches) {}
|
||||
};
|
||||
|
||||
class FuncBranchData {
|
||||
public:
|
||||
typedef std::vector<BranchInfo> ContainerTy;
|
||||
|
||||
StringRef Name;
|
||||
ContainerTy Data;
|
||||
|
||||
FuncBranchData(StringRef Name, ContainerTy Data)
|
||||
: Name(Name), Data(std::move(Data)) {}
|
||||
|
||||
ErrorOr<const BranchInfo &> getBranch(uint64_t From, uint64_t To) const;
|
||||
uint64_t countBranchesTo(StringRef FuncName) const;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// DataReader Class
|
||||
///
|
||||
class DataReader {
|
||||
public:
|
||||
explicit DataReader(raw_ostream &Diag) : Diag(Diag) {}
|
||||
|
||||
DataReader(std::unique_ptr<MemoryBuffer> MemBuf, raw_ostream &Diag)
|
||||
: FileBuf(std::move(MemBuf)), Diag(Diag), ParsingBuf(FileBuf->getBuffer()),
|
||||
Line(0), Col(0) {}
|
||||
|
@ -77,8 +93,11 @@ public:
|
|||
/// offset 12, with 4 mispredictions and 221 branches
|
||||
std::error_code parse();
|
||||
|
||||
ErrorOr<const FuncBranchData &> getFuncBranchData(StringRef FuncName) const;
|
||||
uint64_t countBranchesTo(StringRef FuncName) const;
|
||||
|
||||
/// Dumps the entire data structures parsed. Used for debugging.
|
||||
void dump();
|
||||
void dump() const;
|
||||
|
||||
private:
|
||||
|
||||
|
@ -90,15 +109,13 @@ private:
|
|||
ErrorOr<BranchInfo> parseBranchInfo();
|
||||
bool hasData();
|
||||
|
||||
// Owns reader data structures
|
||||
BumpPtrAllocator Alloc;
|
||||
// An in-memory copy of the input data file - owns strings used in reader
|
||||
std::unique_ptr<MemoryBuffer> FileBuf;
|
||||
raw_ostream &Diag;
|
||||
StringRef ParsingBuf;
|
||||
unsigned Line;
|
||||
unsigned Col;
|
||||
std::vector<BranchInfo> ParsedData;
|
||||
StringMap<FuncBranchData> FuncsMap;
|
||||
static const char FieldSeparator = ' ';
|
||||
};
|
||||
|
||||
|
|
|
@ -87,6 +87,10 @@ static cl::opt<bool>
|
|||
DumpData("dump-data", cl::desc("dump parsed flo data (debugging)"),
|
||||
cl::Hidden);
|
||||
|
||||
static cl::opt<bool>
|
||||
DumpFunctions("dump-functions", cl::desc("dump parsed functions (debugging)"),
|
||||
cl::Hidden);
|
||||
|
||||
static StringRef ToolName;
|
||||
|
||||
static void report_error(StringRef Message, std::error_code EC) {
|
||||
|
@ -158,7 +162,7 @@ public:
|
|||
/// triple \p TripleName.
|
||||
static std::unique_ptr<BinaryContext> CreateBinaryContext(
|
||||
std::string ArchName,
|
||||
std::string TripleName) {
|
||||
std::string TripleName, const DataReader &DR) {
|
||||
|
||||
std::string Error;
|
||||
|
||||
|
@ -255,17 +259,18 @@ static std::unique_ptr<BinaryContext> CreateBinaryContext(
|
|||
std::move(MIA),
|
||||
std::move(MRI),
|
||||
std::move(DisAsm),
|
||||
MAB);
|
||||
MAB,
|
||||
DR);
|
||||
|
||||
return BC;
|
||||
}
|
||||
|
||||
static void OptimizeFile(ELFObjectFileBase *File) {
|
||||
static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
|
||||
|
||||
// FIXME: there should be some way to extract arch and triple information
|
||||
// from the file.
|
||||
std::unique_ptr<BinaryContext> BC =
|
||||
std::move(CreateBinaryContext("x86-64", "x86_64-unknown-linux"));
|
||||
std::move(CreateBinaryContext("x86-64", "x86_64-unknown-linux", DR));
|
||||
if (!BC) {
|
||||
errs() << "failed to create a binary context\n";
|
||||
return;
|
||||
|
@ -421,8 +426,12 @@ static void OptimizeFile(ELFObjectFileBase *File) {
|
|||
if (!Function.buildCFG())
|
||||
continue;
|
||||
|
||||
if (DumpFunctions)
|
||||
Function.print(errs(), true);
|
||||
} // Iterate over all functions
|
||||
|
||||
if (DumpFunctions)
|
||||
return;
|
||||
|
||||
// Run optimization passes.
|
||||
//
|
||||
|
@ -693,18 +702,18 @@ int main(int argc, char **argv) {
|
|||
if (!sys::fs::exists(InputFilename))
|
||||
report_error(InputFilename, errc::no_such_file_or_directory);
|
||||
|
||||
std::unique_ptr<flo::DataReader> DR(new DataReader(errs()));
|
||||
if (!InputDataFilename.empty()) {
|
||||
if (!sys::fs::exists(InputDataFilename))
|
||||
report_error(InputDataFilename, errc::no_such_file_or_directory);
|
||||
|
||||
// Attempt to read input flo data
|
||||
ErrorOr<std::unique_ptr<flo::DataReader>> ReaderOrErr =
|
||||
flo::DataReader::readPerfData(InputDataFilename, errs());
|
||||
auto ReaderOrErr = flo::DataReader::readPerfData(InputDataFilename, errs());
|
||||
if (std::error_code EC = ReaderOrErr.getError())
|
||||
report_error(InputDataFilename, EC);
|
||||
flo::DataReader &DR = *ReaderOrErr.get().get();
|
||||
DR.reset(ReaderOrErr.get().release());
|
||||
if (DumpData) {
|
||||
DR.dump();
|
||||
DR->dump();
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
@ -716,7 +725,7 @@ int main(int argc, char **argv) {
|
|||
Binary &Binary = *BinaryOrErr.get().getBinary();
|
||||
|
||||
if (ELFObjectFileBase *e = dyn_cast<ELFObjectFileBase>(&Binary)) {
|
||||
OptimizeFile(e);
|
||||
OptimizeFile(e, *DR.get());
|
||||
} else {
|
||||
report_error(InputFilename, object_error::invalid_file_type);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue