Add initial implementation of DataReader

Summary:
This patch introduces DataReader, a module responsible for
parsing llvm flo data files into in-memory data structures.

(cherry picked from FBD2515754)
This commit is contained in:
Rafael Auler 2015-10-05 18:31:25 -07:00 committed by Maksim Panchenko
parent 9a2fe7ebe4
commit e1a539b0ec
4 changed files with 294 additions and 0 deletions

View File

@ -14,4 +14,5 @@ add_llvm_tool(llvm-flo
llvm-flo.cpp
BinaryBasicBlock.cpp
BinaryFunction.cpp
DataReader.cpp
)

164
bolt/DataReader.cpp Normal file
View File

@ -0,0 +1,164 @@
//===-- DataReader.cpp - Perf data reader -----------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This family of functions reads profile data written by the perf2flo
// utility and stores it in memory for llvm-flo consumption.
//
//===----------------------------------------------------------------------===//
#include "DataReader.h"
namespace llvm {
namespace flo {
ErrorOr<std::unique_ptr<DataReader>>
DataReader::readPerfData(StringRef Path, raw_ostream &Diag) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(Path);
if (std::error_code EC = MB.getError()) {
Diag << "Cannot open " << Path << ": " << EC.message() << "\n";
}
auto DR = make_unique<DataReader>(std::move(MB.get()), Diag);
DR->parse();
return std::move(DR);
}
void DataReader::reportError(StringRef ErrorMsg) {
Diag << "Error reading flo data input file: line " << Line << ", column "
<< Col << ": " << ErrorMsg << '\n';
}
bool DataReader::expectAndConsumeFS() {
if (ParsingBuf[0] != FieldSeparator) {
reportError("expected field separator");
return false;
}
ParsingBuf = ParsingBuf.drop_front(1);
Col += 1;
return true;
}
ErrorOr<StringRef> DataReader::parseString(char EndChar) {
auto StringEnd = ParsingBuf.find(EndChar);
if (StringEnd == StringRef::npos || StringEnd == 0) {
reportError("malformed field");
return make_error_code(llvm::errc::io_error);
}
StringRef Str = ParsingBuf.substr(0, StringEnd);
ParsingBuf = ParsingBuf.drop_front(StringEnd + 1);
Col += StringEnd + 1;
return Str;
}
ErrorOr<int64_t> DataReader::parseNumberField(char EndChar) {
auto NumStrRes = parseString(EndChar);
if (std::error_code EC = NumStrRes.getError())
return EC;
StringRef NumStr = NumStrRes.get();
int64_t Num;
if (NumStr.getAsInteger(10, Num)) {
reportError("expected decimal number");
Diag << "Found: " << NumStr << "\n";
return make_error_code(llvm::errc::io_error);
}
return Num;
}
ErrorOr<Location> DataReader::parseLocation() {
// Read whether the location of the branch should be DSO or a symbol
if (ParsingBuf[0] != '0' && ParsingBuf[0] != '1') {
reportError("expected 0 or 1");
return make_error_code(llvm::errc::io_error);
}
bool IsSymbol = ParsingBuf[0] == '1';
ParsingBuf = ParsingBuf.drop_front(1);
Col += 1;
if (!expectAndConsumeFS())
return make_error_code(llvm::errc::io_error);
// Read the string containing the symbol or the DSO name
auto NameRes = parseString(FieldSeparator);
if (std::error_code EC = NameRes.getError())
return EC;
StringRef Name = NameRes.get();
// Read the offset
auto OffsetStrRes = parseString(FieldSeparator);
if (std::error_code EC = OffsetStrRes.getError())
return EC;
StringRef OffsetStr = OffsetStrRes.get();
uint64_t Offset;
if (OffsetStr.getAsInteger(16, Offset)) {
reportError("expected hexadecimal number");
Diag << "Found: " << OffsetStr << "\n";
return make_error_code(llvm::errc::io_error);
}
return Location(IsSymbol, Name, Offset);
}
ErrorOr<BranchInfo> DataReader::parseBranchInfo() {
auto Res = parseLocation();
if (std::error_code EC = Res.getError())
return EC;
Location From = Res.get();
Res = parseLocation();
if (std::error_code EC = Res.getError())
return EC;
Location To = Res.get();
auto MRes = parseNumberField(FieldSeparator);
if (std::error_code EC = MRes.getError())
return EC;
int64_t NumMispreds = MRes.get();
auto BRes = parseNumberField('\n');
if (std::error_code EC = BRes.getError())
return EC;
int64_t NumBranches = BRes.get();
return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches);
}
bool DataReader::hasData() {
if (ParsingBuf.size() == 0)
return false;
if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1')
return true;
return false;
}
std::error_code DataReader::parse() {
Col = 0;
Line = 1;
while (hasData()) {
auto Res = parseBranchInfo();
if (std::error_code EC = Res.getError())
return EC;
Col = 0;
Line += 1;
BranchInfo BI = Res.get();
ParsedData.emplace_back(std::move(BI));
}
return std::error_code();
}
void DataReader::dump() {
for (auto &BI : ParsedData) {
Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " "
<< BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n";
}
}
}
}

110
bolt/DataReader.h Normal file
View File

@ -0,0 +1,110 @@
//===-- Reader/DataReader.h - Perf data reader ------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This family of functions reads profile data written by the perf2flo
// utility and stores it in memory for llvm-flo consumption.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_FLO_DATA_READER_H
#define LLVM_TOOLS_LLVM_FLO_DATA_READER_H
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
namespace flo {
struct Location {
bool IsSymbol;
StringRef Name;
uint64_t Offset;
Location(bool IsSymbol, StringRef Name, uint64_t Offset)
: IsSymbol(IsSymbol), Name(Name), Offset(Offset) {}
};
struct BranchInfo {
Location From;
Location To;
int64_t Mispreds;
int64_t Branches;
BranchInfo(Location From, Location To, int64_t Mispreds, int64_t Branches)
: From(std::move(From)), To(std::move(To)), Mispreds(Mispreds),
Branches(Branches) {}
};
//===----------------------------------------------------------------------===//
//
/// DataReader Class
///
class DataReader {
public:
DataReader(std::unique_ptr<MemoryBuffer> MemBuf, raw_ostream &Diag)
: FileBuf(std::move(MemBuf)), Diag(Diag), ParsingBuf(FileBuf->getBuffer()),
Line(0), Col(0) {}
static ErrorOr<std::unique_ptr<DataReader>> readPerfData(StringRef Path,
raw_ostream &Diag);
/// Parses the input flo data file into internal data structures. We expect
/// the file format to follow the syntax below.
///
/// <is symbol?> <closest elf symbol or DSO name> <relative FROM address>
/// <is symbol?> <closest elf symbol or DSO name> <relative TO address>
/// <number of mispredictions> <number of branches>
///
/// In <is symbol?> field we record 0 if our closest address is a DSO load
/// address or 1 if our closest address is an ELF symbol.
///
/// Example:
///
/// 1 main 3fb 0 /lib/ld-2.21.so 12 4 221
///
/// The example records branches from symbol main, offset 3fb, to DSO ld-2.21,
/// offset 12, with 4 mispredictions and 221 branches
std::error_code parse();
/// Dumps the entire data structures parsed. Used for debugging.
void dump();
private:
void reportError(StringRef ErrorMsg);
bool expectAndConsumeFS();
ErrorOr<StringRef> parseString(char EndChar);
ErrorOr<int64_t> parseNumberField(char EndChar);
ErrorOr<Location> parseLocation();
ErrorOr<BranchInfo> parseBranchInfo();
bool hasData();
// Owns reader data structures
BumpPtrAllocator Alloc;
// An in-memory copy of the input data file - owns strings used in reader
std::unique_ptr<MemoryBuffer> FileBuf;
raw_ostream &Diag;
StringRef ParsingBuf;
unsigned Line;
unsigned Col;
std::vector<BranchInfo> ParsedData;
static const char FieldSeparator = ' ';
};
}
}
#endif

View File

@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
#include "DataReader.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
@ -82,6 +83,10 @@ EliminateUnreachable("eliminate-unreachable",
cl::desc("eliminate unreachable code"),
cl::Optional);
static cl::opt<bool>
DumpData("dump-data", cl::desc("dump parsed flo data (debugging)"),
cl::Hidden);
static StringRef ToolName;
static void report_error(StringRef Message, std::error_code EC) {
@ -687,6 +692,20 @@ int main(int argc, char **argv) {
if (!sys::fs::exists(InputFilename))
report_error(InputFilename, errc::no_such_file_or_directory);
if (!sys::fs::exists(InputDataFilename))
report_error(InputDataFilename, errc::no_such_file_or_directory);
// Attempt to read input flo data
ErrorOr<std::unique_ptr<flo::DataReader>> ReaderOrErr =
flo::DataReader::readPerfData(InputDataFilename, errs());
if (std::error_code EC = ReaderOrErr.getError())
report_error(InputDataFilename, EC);
flo::DataReader &DR = *ReaderOrErr.get().get();
if (DumpData) {
DR.dump();
return EXIT_SUCCESS;
}
// Attempt to open the binary.
ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(InputFilename);
if (std::error_code EC = BinaryOrErr.getError())