Add branch count information to binary CFG

Summary:
Changes DataReader to organize branch perf data per function name and
sets up logistics to bring this data to BinaryFunction::buildCFG(). To do this,
we expand BinaryContext with a const reference to DataReader. This patch also
adds the "-dump-functions" flag to force llvm-flo to dump the current state of
BinaryFunctions once they are disassembled and their CFG built, allowing us to
test whether the builder is sane with LLVM LIT tests.

(cherry picked from FBD2534675)
This commit is contained in:
Rafael Auler 2015-10-12 12:30:47 -07:00 committed by Maksim Panchenko
parent d30423f872
commit 4c1da22ae9
6 changed files with 139 additions and 31 deletions

View File

@ -35,19 +35,24 @@ void BinaryBasicBlock::addSuccessor(BinaryBasicBlock *Succ,
uint64_t Count,
uint64_t MispredictedCount) {
Successors.push_back(Succ);
BranchInfo.push_back({Count, MispredictedCount});
Succ->Predecessors.push_back(this);
// TODO: update weights.
}
void BinaryBasicBlock::removeSuccessor(BinaryBasicBlock *Succ) {
Succ->removePredecessor(this);
auto I = std::find(succ_begin(), succ_end(), Succ);
auto I = succ_begin();
auto BI = BranchInfo.begin();
for (; I != succ_end(); ++I) {
assert(BI != BranchInfo.end() && "missing BranchInfo entry");
if (*I == Succ)
break;
++BI;
}
assert(I != succ_end() && "no such successor!");
Successors.erase(I);
// TODO: update weights.
BranchInfo.erase(BI);
}
void BinaryBasicBlock::addPredecessor(BinaryBasicBlock *Pred) {

View File

@ -35,6 +35,8 @@ namespace llvm {
namespace flo {
class DataReader;
/// Everything that's needed to process binaries lives here.
class BinaryContext {
@ -77,6 +79,8 @@ public:
MCAsmBackend *MAB;
const DataReader &DR;
BinaryContext(std::unique_ptr<MCContext> Ctx,
std::unique_ptr<Triple> TheTriple,
const Target *TheTarget,
@ -89,7 +93,8 @@ public:
std::unique_ptr<const MCInstrAnalysis> MIA,
std::unique_ptr<const MCRegisterInfo> MRI,
std::unique_ptr<MCDisassembler> DisAsm,
MCAsmBackend *MAB) :
MCAsmBackend *MAB,
const DataReader &DR) :
Ctx(std::move(Ctx)),
TheTriple(std::move(TheTriple)),
TheTarget(TheTarget),
@ -102,7 +107,8 @@ public:
MIA(std::move(MIA)),
MRI(std::move(MRI)),
DisAsm(std::move(DisAsm)),
MAB(MAB) {}
MAB(MAB),
DR(DR) {}
~BinaryContext() {}
};

View File

@ -24,6 +24,7 @@
#include "BinaryBasicBlock.h"
#include "BinaryFunction.h"
#include "DataReader.h"
#undef DEBUG_TYPE
#define DEBUG_TYPE "flo"
@ -61,8 +62,10 @@ void BinaryFunction::print(raw_ostream &OS, bool PrintInstructions) const {
<< "\n Orc Section : " << getCodeSectionName()
<< "\n IsSimple : " << IsSimple
<< "\n BB count : " << BasicBlocks.size()
<< "\n Image : 0x" << Twine::utohexstr(ImageAddress)
<< "\n}\n";
<< "\n Image : 0x" << Twine::utohexstr(ImageAddress);
if (ExecutionCount != COUNT_NO_PROFILE)
OS << "\n Exec Count : " << ExecutionCount;
OS << "\n}\n";
if (!PrintInstructions || !BC.InstPrinter)
return;
@ -122,10 +125,14 @@ void BinaryFunction::print(raw_ostream &OS, bool PrintInstructions) const {
if (!BB.Successors.empty()) {
OS << " Successors: ";
auto BI = BB.BranchInfo.begin();
auto Sep = "";
for (auto Succ : BB.Successors) {
OS << Sep << Succ->getName();
assert(BI != BB.BranchInfo.end() && "missing BranchInfo entry");
OS << Sep << Succ->getName() << " (mispreds: " << BI->MispredictedCount
<< ", count: " << BI->Count << ")";
Sep = ", ";
++BI;
}
OS << '\n';
}
@ -307,6 +314,13 @@ bool BinaryFunction::buildCFG() {
auto &MIA = BC.MIA;
auto BranchDataOrErr = BC.DR.getFuncBranchData(getName());
if (std::error_code EC = BranchDataOrErr.getError()) {
DEBUG(dbgs() << "no branch data found for \"" << getName() << "\"\n");
} else {
ExecutionCount = BC.DR.countBranchesTo(getName());
}
if (!isSimple())
return false;
@ -378,9 +392,18 @@ bool BinaryFunction::buildCFG() {
BinaryBasicBlock *ToBB = getBasicBlockAtOffset(Branch.second);
assert(ToBB && "cannot find BB containing TO branch");
// TODO: add weights here.
//
FromBB->addSuccessor(ToBB);
if (std::error_code EC = BranchDataOrErr.getError()) {
FromBB->addSuccessor(ToBB);
} else {
const FuncBranchData &BranchData = BranchDataOrErr.get();
auto BranchInfoOrErr = BranchData.getBranch(Branch.first, Branch.second);
if (std::error_code EC = BranchInfoOrErr.getError()) {
FromBB->addSuccessor(ToBB);
} else {
const BranchInfo &BInfo = BranchInfoOrErr.get();
FromBB->addSuccessor(ToBB, BInfo.Branches, BInfo.Mispreds);
}
}
}
// Add fall-through branches.

View File

@ -18,6 +18,25 @@
namespace llvm {
namespace flo {
ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
uint64_t To) const {
for (const auto &I : Data) {
if (I.From.Offset == From && I.To.Offset == To)
return I;
}
return make_error_code(llvm::errc::invalid_argument);
}
uint64_t
FuncBranchData::countBranchesTo(StringRef FuncName) const {
uint64_t TotalCount = 0;
for (const auto &I : Data) {
if (I.To.Offset == 0 && I.To.Name == FuncName)
TotalCount += I.Branches;
}
return TotalCount;
}
ErrorOr<std::unique_ptr<DataReader>>
DataReader::readPerfData(StringRef Path, raw_ostream &Diag) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
@ -148,16 +167,45 @@ std::error_code DataReader::parse() {
return EC;
Col = 0;
Line += 1;
BranchInfo BI = Res.get();
ParsedData.emplace_back(std::move(BI));
StringRef Name = BI.From.Name;
auto I = FuncsMap.find(Name);
if (I == FuncsMap.end()) {
FuncBranchData::ContainerTy Cont;
Cont.emplace_back(std::move(BI));
FuncsMap.insert(
std::make_pair(Name, FuncBranchData(Name, std::move(Cont))));
continue;
}
I->getValue().Data.emplace_back(std::move(BI));
}
return std::error_code();
}
void DataReader::dump() {
for (auto &BI : ParsedData) {
Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
<< BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
ErrorOr<const FuncBranchData &>
DataReader::getFuncBranchData(StringRef FuncName) const {
const auto I = FuncsMap.find(FuncName);
if (I == FuncsMap.end()) {
return make_error_code(llvm::errc::invalid_argument);
}
return I->getValue();
}
uint64_t DataReader::countBranchesTo(StringRef FuncName) const {
uint64_t TotalCount = 0;
for (const auto &KV : FuncsMap) {
TotalCount += KV.getValue().countBranchesTo(FuncName);
}
return TotalCount;
}
void DataReader::dump() const {
for (const auto &Func : FuncsMap) {
for (const auto &BI : Func.getValue().Data) {
Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
<< BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
}
}
}
}

View File

@ -46,12 +46,28 @@ struct BranchInfo {
Branches(Branches) {}
};
class FuncBranchData {
public:
typedef std::vector<BranchInfo> ContainerTy;
StringRef Name;
ContainerTy Data;
FuncBranchData(StringRef Name, ContainerTy Data)
: Name(Name), Data(std::move(Data)) {}
ErrorOr<const BranchInfo &> getBranch(uint64_t From, uint64_t To) const;
uint64_t countBranchesTo(StringRef FuncName) const;
};
//===----------------------------------------------------------------------===//
//
/// DataReader Class
///
class DataReader {
public:
explicit DataReader(raw_ostream &Diag) : Diag(Diag) {}
DataReader(std::unique_ptr<MemoryBuffer> MemBuf, raw_ostream &Diag)
: FileBuf(std::move(MemBuf)), Diag(Diag), ParsingBuf(FileBuf->getBuffer()),
Line(0), Col(0) {}
@ -77,8 +93,11 @@ public:
/// offset 12, with 4 mispredictions and 221 branches
std::error_code parse();
ErrorOr<const FuncBranchData &> getFuncBranchData(StringRef FuncName) const;
uint64_t countBranchesTo(StringRef FuncName) const;
/// Dumps the entire data structures parsed. Used for debugging.
void dump();
void dump() const;
private:
@ -90,15 +109,13 @@ private:
ErrorOr<BranchInfo> parseBranchInfo();
bool hasData();
// Owns reader data structures
BumpPtrAllocator Alloc;
// An in-memory copy of the input data file - owns strings used in reader
std::unique_ptr<MemoryBuffer> FileBuf;
raw_ostream &Diag;
StringRef ParsingBuf;
unsigned Line;
unsigned Col;
std::vector<BranchInfo> ParsedData;
StringMap<FuncBranchData> FuncsMap;
static const char FieldSeparator = ' ';
};

View File

@ -87,6 +87,10 @@ static cl::opt<bool>
DumpData("dump-data", cl::desc("dump parsed flo data (debugging)"),
cl::Hidden);
static cl::opt<bool>
DumpFunctions("dump-functions", cl::desc("dump parsed functions (debugging)"),
cl::Hidden);
static StringRef ToolName;
static void report_error(StringRef Message, std::error_code EC) {
@ -158,7 +162,7 @@ public:
/// triple \p TripleName.
static std::unique_ptr<BinaryContext> CreateBinaryContext(
std::string ArchName,
std::string TripleName) {
std::string TripleName, const DataReader &DR) {
std::string Error;
@ -255,17 +259,18 @@ static std::unique_ptr<BinaryContext> CreateBinaryContext(
std::move(MIA),
std::move(MRI),
std::move(DisAsm),
MAB);
MAB,
DR);
return BC;
}
static void OptimizeFile(ELFObjectFileBase *File) {
static void OptimizeFile(ELFObjectFileBase *File, const DataReader &DR) {
// FIXME: there should be some way to extract arch and triple information
// from the file.
std::unique_ptr<BinaryContext> BC =
std::move(CreateBinaryContext("x86-64", "x86_64-unknown-linux"));
std::move(CreateBinaryContext("x86-64", "x86_64-unknown-linux", DR));
if (!BC) {
errs() << "failed to create a binary context\n";
return;
@ -421,8 +426,12 @@ static void OptimizeFile(ELFObjectFileBase *File) {
if (!Function.buildCFG())
continue;
if (DumpFunctions)
Function.print(errs(), true);
} // Iterate over all functions
if (DumpFunctions)
return;
// Run optimization passes.
//
@ -693,18 +702,18 @@ int main(int argc, char **argv) {
if (!sys::fs::exists(InputFilename))
report_error(InputFilename, errc::no_such_file_or_directory);
std::unique_ptr<flo::DataReader> DR(new DataReader(errs()));
if (!InputDataFilename.empty()) {
if (!sys::fs::exists(InputDataFilename))
report_error(InputDataFilename, errc::no_such_file_or_directory);
// Attempt to read input flo data
ErrorOr<std::unique_ptr<flo::DataReader>> ReaderOrErr =
flo::DataReader::readPerfData(InputDataFilename, errs());
auto ReaderOrErr = flo::DataReader::readPerfData(InputDataFilename, errs());
if (std::error_code EC = ReaderOrErr.getError())
report_error(InputDataFilename, EC);
flo::DataReader &DR = *ReaderOrErr.get().get();
DR.reset(ReaderOrErr.get().release());
if (DumpData) {
DR.dump();
DR->dump();
return EXIT_SUCCESS;
}
}
@ -716,7 +725,7 @@ int main(int argc, char **argv) {
Binary &Binary = *BinaryOrErr.get().getBinary();
if (ELFObjectFileBase *e = dyn_cast<ELFObjectFileBase>(&Binary)) {
OptimizeFile(e);
OptimizeFile(e, *DR.get());
} else {
report_error(InputFilename, object_error::invalid_file_type);
}