forked from OSchip/llvm-project
269 lines
8.8 KiB
C++
269 lines
8.8 KiB
C++
#include "ThinLtoModuleIndex.h"
|
|
|
|
#include "llvm/Bitcode/BitcodeReader.h"
|
|
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
|
|
#include "llvm/IR/LLVMContext.h"
|
|
#include "llvm/IRReader/IRReader.h"
|
|
#include "llvm/Support/SourceMgr.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include <memory>
|
|
#include <string>
|
|
|
|
#define DEBUG_TYPE "thinltojit"
|
|
|
|
namespace llvm {
|
|
namespace orc {
|
|
|
|
Error ThinLtoModuleIndex::add(StringRef InputPath) {
|
|
auto Buffer = errorOrToExpected(MemoryBuffer::getFile(InputPath));
|
|
if (!Buffer)
|
|
return Buffer.takeError();
|
|
|
|
Error ParseErr = readModuleSummaryIndex((*Buffer)->getMemBufferRef(),
|
|
CombinedSummaryIndex, NextModuleId);
|
|
if (ParseErr)
|
|
return ParseErr;
|
|
|
|
#ifndef NDEBUG
|
|
auto Paths = getAllModulePaths();
|
|
unsigned TotalPaths = Paths.size();
|
|
std::sort(Paths.begin(), Paths.end());
|
|
Paths.erase(std::unique(Paths.begin(), Paths.end()), Paths.end());
|
|
assert(TotalPaths == Paths.size() && "Module paths must be unique");
|
|
#endif
|
|
|
|
++NextModuleId;
|
|
return Error::success();
|
|
}
|
|
|
|
std::vector<StringRef> ThinLtoModuleIndex::getAllModulePaths() const {
|
|
auto ModuleTable = CombinedSummaryIndex.modulePaths();
|
|
|
|
std::vector<StringRef> Paths;
|
|
Paths.resize(ModuleTable.size());
|
|
|
|
for (const auto &KV : ModuleTable) {
|
|
assert(Paths[KV.second.first].empty() && "IDs are unique and continuous");
|
|
Paths[KV.second.first] = KV.first();
|
|
}
|
|
|
|
return Paths;
|
|
}
|
|
|
|
GlobalValueSummary *
|
|
ThinLtoModuleIndex::getSummary(GlobalValue::GUID Function) const {
|
|
ValueInfo VI = CombinedSummaryIndex.getValueInfo(Function);
|
|
if (!VI || VI.getSummaryList().empty())
|
|
return nullptr;
|
|
|
|
// There can be more than one symbol with the same GUID, in the case of same-
|
|
// named locals in different but same-named source files that were compiled in
|
|
// their respective directories (so the source file name and resulting GUID is
|
|
// the same). We avoid this by checking that module paths are unique upon
|
|
// add().
|
|
//
|
|
// TODO: We can still get duplicates on symbols declared with
|
|
// attribute((weak)), a GNU extension supported by gcc and clang.
|
|
// We should support it by looking for a symbol in the current module
|
|
// or in the same module as the caller.
|
|
assert(VI.getSummaryList().size() == 1 && "Weak symbols not yet supported");
|
|
|
|
return VI.getSummaryList().front().get()->getBaseObject();
|
|
}
|
|
|
|
Optional<StringRef>
|
|
ThinLtoModuleIndex::getModulePathForSymbol(StringRef Name) const {
|
|
if (GlobalValueSummary *S = getSummary(GlobalValue::getGUID(Name)))
|
|
return S->modulePath();
|
|
return None; // We don't know the symbol.
|
|
}
|
|
|
|
void ThinLtoModuleIndex::scheduleModuleParsingPrelocked(StringRef Path) {
|
|
// Once the module was scheduled, we can call takeModule().
|
|
auto ScheduledIt = ScheduledModules.find(Path);
|
|
if (ScheduledIt != ScheduledModules.end())
|
|
return;
|
|
|
|
auto Worker = [this](std::string Path) {
|
|
if (auto TSM = doParseModule(Path)) {
|
|
std::lock_guard<std::mutex> Lock(ParsedModulesLock);
|
|
ParsedModules[Path] = std::move(*TSM);
|
|
|
|
LLVM_DEBUG(dbgs() << "Finished parsing module: " << Path << "\n");
|
|
} else {
|
|
ES.reportError(TSM.takeError());
|
|
}
|
|
};
|
|
|
|
LLVM_DEBUG(dbgs() << "Schedule module for parsing: " << Path << "\n");
|
|
ScheduledModules[Path] = ParseModuleWorkers.async(Worker, Path.str());
|
|
}
|
|
|
|
ThreadSafeModule ThinLtoModuleIndex::takeModule(StringRef Path) {
|
|
std::unique_lock<std::mutex> ParseLock(ParsedModulesLock);
|
|
|
|
auto ParsedIt = ParsedModules.find(Path);
|
|
if (ParsedIt == ParsedModules.end()) {
|
|
ParseLock.unlock();
|
|
|
|
// The module is not ready, wait for the future we stored.
|
|
std::unique_lock<std::mutex> ScheduleLock(ScheduledModulesLock);
|
|
auto ScheduledIt = ScheduledModules.find(Path);
|
|
assert(ScheduledIt != ScheduledModules.end() &&
|
|
"Don't call for unscheduled modules");
|
|
std::shared_future<void> Future = ScheduledIt->getValue();
|
|
ScheduleLock.unlock();
|
|
Future.get();
|
|
|
|
ParseLock.lock();
|
|
ParsedIt = ParsedModules.find(Path);
|
|
assert(ParsedIt != ParsedModules.end() && "Must be ready now");
|
|
}
|
|
|
|
// We only add each module once. If it's not here anymore, we can skip it.
|
|
ThreadSafeModule TSM = std::move(ParsedIt->getValue());
|
|
ParsedIt->getValue() = ThreadSafeModule();
|
|
return TSM;
|
|
}
|
|
|
|
ThreadSafeModule ThinLtoModuleIndex::parseModuleFromFile(StringRef Path) {
|
|
{
|
|
std::lock_guard<std::mutex> ScheduleLock(ScheduledModulesLock);
|
|
scheduleModuleParsingPrelocked(Path);
|
|
}
|
|
return takeModule(Path);
|
|
}
|
|
|
|
Expected<ThreadSafeModule> ThinLtoModuleIndex::doParseModule(StringRef Path) {
|
|
// TODO: make a SMDiagnosticError class for this
|
|
SMDiagnostic Err;
|
|
auto Ctx = std::make_unique<LLVMContext>();
|
|
auto M = parseIRFile(Path, Err, *Ctx);
|
|
if (!M) {
|
|
std::string ErrDescription;
|
|
{
|
|
raw_string_ostream S(ErrDescription);
|
|
Err.print("ThinLtoJIT", S);
|
|
}
|
|
return createStringError(inconvertibleErrorCode(),
|
|
"Failed to load module from file '%s' (%s)",
|
|
Path.data(), ErrDescription.c_str());
|
|
}
|
|
|
|
return ThreadSafeModule(std::move(M), std::move(Ctx));
|
|
}
|
|
|
|
// We don't filter visited functions. Discovery will often be retriggered
|
|
// from the middle of already visited functions and it aims to reach a little
|
|
// further each time.
|
|
void ThinLtoModuleIndex::discoverCalleeModulePaths(FunctionSummary *S,
|
|
unsigned LookaheadLevels) {
|
|
// Populate initial worklist
|
|
std::vector<FunctionSummary *> Worklist;
|
|
addToWorklist(Worklist, S->calls());
|
|
unsigned Distance = 0;
|
|
|
|
while (++Distance < LookaheadLevels) {
|
|
// Process current worklist and populate a new one.
|
|
std::vector<FunctionSummary *> NextWorklist;
|
|
for (FunctionSummary *F : Worklist) {
|
|
updatePathRank(F->modulePath(), Distance);
|
|
addToWorklist(NextWorklist, F->calls());
|
|
}
|
|
Worklist = std::move(NextWorklist);
|
|
}
|
|
|
|
// Process the last worklist without filling a new one
|
|
for (FunctionSummary *F : Worklist) {
|
|
updatePathRank(F->modulePath(), Distance);
|
|
}
|
|
|
|
// Reset counts for known paths (includes both, scheduled and parsed modules).
|
|
std::lock_guard<std::mutex> Lock(ScheduledModulesLock);
|
|
for (const auto &KV : ScheduledModules) {
|
|
PathRank[KV.first()].Count = 0;
|
|
}
|
|
}
|
|
|
|
void ThinLtoModuleIndex::addToWorklist(
|
|
std::vector<FunctionSummary *> &List,
|
|
ArrayRef<FunctionSummary::EdgeTy> Calls) {
|
|
for (const auto &Edge : Calls) {
|
|
const auto &SummaryList = Edge.first.getSummaryList();
|
|
if (!SummaryList.empty()) {
|
|
GlobalValueSummary *S = SummaryList.front().get()->getBaseObject();
|
|
assert(isa<FunctionSummary>(S) && "Callees must be functions");
|
|
List.push_back(cast<FunctionSummary>(S));
|
|
}
|
|
}
|
|
}
|
|
|
|
// PathRank is global and continuous.
|
|
void ThinLtoModuleIndex::updatePathRank(StringRef Path, unsigned Distance) {
|
|
auto &Entry = PathRank[Path];
|
|
Entry.Count += 1;
|
|
Entry.MinDist = std::min(Entry.MinDist, Distance);
|
|
assert(Entry.MinDist > 0 && "We want it as a divisor");
|
|
};
|
|
|
|
// TODO: The size of a ThreadPool's task queue is not accessible. It would
|
|
// be great to know in order to estimate how many modules we schedule. The
|
|
// more we schedule, the less precise is the ranking. The less we schedule,
|
|
// the higher the risk for downtime.
|
|
std::vector<std::string> ThinLtoModuleIndex::selectNextPaths() {
|
|
struct ScorePath {
|
|
float Score;
|
|
unsigned MinDist;
|
|
StringRef Path;
|
|
};
|
|
|
|
std::vector<ScorePath> Candidates;
|
|
Candidates.reserve(PathRank.size());
|
|
for (const auto &KV : PathRank) {
|
|
float Score = static_cast<float>(KV.second.Count) / KV.second.MinDist;
|
|
if (Score > .0f) {
|
|
Candidates.push_back({Score, KV.second.MinDist, KV.first()});
|
|
}
|
|
}
|
|
|
|
// Sort candidates by descending score.
|
|
std::sort(Candidates.begin(), Candidates.end(),
|
|
[](const ScorePath &LHS, const ScorePath &RHS) {
|
|
return LHS.Score > RHS.Score;
|
|
});
|
|
|
|
// Sort highest score candidates by ascending minimal distance.
|
|
size_t Selected =
|
|
std::min(std::max<size_t>(NumParseModuleThreads, Candidates.size() / 2),
|
|
Candidates.size());
|
|
std::sort(Candidates.begin(), Candidates.begin() + Selected,
|
|
[](const ScorePath &LHS, const ScorePath &RHS) {
|
|
return LHS.MinDist < RHS.MinDist;
|
|
});
|
|
|
|
std::vector<std::string> Paths;
|
|
Paths.reserve(Selected);
|
|
for (unsigned i = 0; i < Selected; i++) {
|
|
Paths.push_back(Candidates[i].Path.str());
|
|
}
|
|
|
|
LLVM_DEBUG(dbgs() << "ModuleIndex: select " << Paths.size() << " out of "
|
|
<< Candidates.size() << " discovered paths\n");
|
|
|
|
return Paths;
|
|
}
|
|
|
|
unsigned ThinLtoModuleIndex::getNumDiscoveredModules() const {
|
|
// TODO: It would probably be more efficient to track the number of
|
|
// unscheduled modules.
|
|
unsigned NonNullItems = 0;
|
|
for (const auto &KV : PathRank)
|
|
if (KV.second.Count > 0)
|
|
++NonNullItems;
|
|
return NonNullItems;
|
|
}
|
|
|
|
} // namespace orc
|
|
} // namespace llvm
|