llvm-project/llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.cpp

269 lines
8.8 KiB
C++

#include "ThinLtoModuleIndex.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <memory>
#include <string>
#define DEBUG_TYPE "thinltojit"
namespace llvm {
namespace orc {
Error ThinLtoModuleIndex::add(StringRef InputPath) {
auto Buffer = errorOrToExpected(MemoryBuffer::getFile(InputPath));
if (!Buffer)
return Buffer.takeError();
Error ParseErr = readModuleSummaryIndex((*Buffer)->getMemBufferRef(),
CombinedSummaryIndex, NextModuleId);
if (ParseErr)
return ParseErr;
#ifndef NDEBUG
auto Paths = getAllModulePaths();
unsigned TotalPaths = Paths.size();
std::sort(Paths.begin(), Paths.end());
Paths.erase(std::unique(Paths.begin(), Paths.end()), Paths.end());
assert(TotalPaths == Paths.size() && "Module paths must be unique");
#endif
++NextModuleId;
return Error::success();
}
std::vector<StringRef> ThinLtoModuleIndex::getAllModulePaths() const {
auto ModuleTable = CombinedSummaryIndex.modulePaths();
std::vector<StringRef> Paths;
Paths.resize(ModuleTable.size());
for (const auto &KV : ModuleTable) {
assert(Paths[KV.second.first].empty() && "IDs are unique and continuous");
Paths[KV.second.first] = KV.first();
}
return Paths;
}
GlobalValueSummary *
ThinLtoModuleIndex::getSummary(GlobalValue::GUID Function) const {
ValueInfo VI = CombinedSummaryIndex.getValueInfo(Function);
if (!VI || VI.getSummaryList().empty())
return nullptr;
// There can be more than one symbol with the same GUID, in the case of same-
// named locals in different but same-named source files that were compiled in
// their respective directories (so the source file name and resulting GUID is
// the same). We avoid this by checking that module paths are unique upon
// add().
//
// TODO: We can still get duplicates on symbols declared with
// attribute((weak)), a GNU extension supported by gcc and clang.
// We should support it by looking for a symbol in the current module
// or in the same module as the caller.
assert(VI.getSummaryList().size() == 1 && "Weak symbols not yet supported");
return VI.getSummaryList().front().get()->getBaseObject();
}
Optional<StringRef>
ThinLtoModuleIndex::getModulePathForSymbol(StringRef Name) const {
if (GlobalValueSummary *S = getSummary(GlobalValue::getGUID(Name)))
return S->modulePath();
return None; // We don't know the symbol.
}
void ThinLtoModuleIndex::scheduleModuleParsingPrelocked(StringRef Path) {
// Once the module was scheduled, we can call takeModule().
auto ScheduledIt = ScheduledModules.find(Path);
if (ScheduledIt != ScheduledModules.end())
return;
auto Worker = [this](std::string Path) {
if (auto TSM = doParseModule(Path)) {
std::lock_guard<std::mutex> Lock(ParsedModulesLock);
ParsedModules[Path] = std::move(*TSM);
LLVM_DEBUG(dbgs() << "Finished parsing module: " << Path << "\n");
} else {
ES.reportError(TSM.takeError());
}
};
LLVM_DEBUG(dbgs() << "Schedule module for parsing: " << Path << "\n");
ScheduledModules[Path] = ParseModuleWorkers.async(Worker, Path.str());
}
ThreadSafeModule ThinLtoModuleIndex::takeModule(StringRef Path) {
std::unique_lock<std::mutex> ParseLock(ParsedModulesLock);
auto ParsedIt = ParsedModules.find(Path);
if (ParsedIt == ParsedModules.end()) {
ParseLock.unlock();
// The module is not ready, wait for the future we stored.
std::unique_lock<std::mutex> ScheduleLock(ScheduledModulesLock);
auto ScheduledIt = ScheduledModules.find(Path);
assert(ScheduledIt != ScheduledModules.end() &&
"Don't call for unscheduled modules");
std::shared_future<void> Future = ScheduledIt->getValue();
ScheduleLock.unlock();
Future.get();
ParseLock.lock();
ParsedIt = ParsedModules.find(Path);
assert(ParsedIt != ParsedModules.end() && "Must be ready now");
}
// We only add each module once. If it's not here anymore, we can skip it.
ThreadSafeModule TSM = std::move(ParsedIt->getValue());
ParsedIt->getValue() = ThreadSafeModule();
return TSM;
}
ThreadSafeModule ThinLtoModuleIndex::parseModuleFromFile(StringRef Path) {
{
std::lock_guard<std::mutex> ScheduleLock(ScheduledModulesLock);
scheduleModuleParsingPrelocked(Path);
}
return takeModule(Path);
}
Expected<ThreadSafeModule> ThinLtoModuleIndex::doParseModule(StringRef Path) {
// TODO: make a SMDiagnosticError class for this
SMDiagnostic Err;
auto Ctx = std::make_unique<LLVMContext>();
auto M = parseIRFile(Path, Err, *Ctx);
if (!M) {
std::string ErrDescription;
{
raw_string_ostream S(ErrDescription);
Err.print("ThinLtoJIT", S);
}
return createStringError(inconvertibleErrorCode(),
"Failed to load module from file '%s' (%s)",
Path.data(), ErrDescription.c_str());
}
return ThreadSafeModule(std::move(M), std::move(Ctx));
}
// We don't filter visited functions. Discovery will often be retriggered
// from the middle of already visited functions and it aims to reach a little
// further each time.
void ThinLtoModuleIndex::discoverCalleeModulePaths(FunctionSummary *S,
unsigned LookaheadLevels) {
// Populate initial worklist
std::vector<FunctionSummary *> Worklist;
addToWorklist(Worklist, S->calls());
unsigned Distance = 0;
while (++Distance < LookaheadLevels) {
// Process current worklist and populate a new one.
std::vector<FunctionSummary *> NextWorklist;
for (FunctionSummary *F : Worklist) {
updatePathRank(F->modulePath(), Distance);
addToWorklist(NextWorklist, F->calls());
}
Worklist = std::move(NextWorklist);
}
// Process the last worklist without filling a new one
for (FunctionSummary *F : Worklist) {
updatePathRank(F->modulePath(), Distance);
}
// Reset counts for known paths (includes both, scheduled and parsed modules).
std::lock_guard<std::mutex> Lock(ScheduledModulesLock);
for (const auto &KV : ScheduledModules) {
PathRank[KV.first()].Count = 0;
}
}
void ThinLtoModuleIndex::addToWorklist(
std::vector<FunctionSummary *> &List,
ArrayRef<FunctionSummary::EdgeTy> Calls) {
for (const auto &Edge : Calls) {
const auto &SummaryList = Edge.first.getSummaryList();
if (!SummaryList.empty()) {
GlobalValueSummary *S = SummaryList.front().get()->getBaseObject();
assert(isa<FunctionSummary>(S) && "Callees must be functions");
List.push_back(cast<FunctionSummary>(S));
}
}
}
// PathRank is global and continuous.
void ThinLtoModuleIndex::updatePathRank(StringRef Path, unsigned Distance) {
auto &Entry = PathRank[Path];
Entry.Count += 1;
Entry.MinDist = std::min(Entry.MinDist, Distance);
assert(Entry.MinDist > 0 && "We want it as a divisor");
}
// TODO: The size of a ThreadPool's task queue is not accessible. It would
// be great to know in order to estimate how many modules we schedule. The
// more we schedule, the less precise is the ranking. The less we schedule,
// the higher the risk for downtime.
std::vector<std::string> ThinLtoModuleIndex::selectNextPaths() {
struct ScorePath {
float Score;
unsigned MinDist;
StringRef Path;
};
std::vector<ScorePath> Candidates;
Candidates.reserve(PathRank.size());
for (const auto &KV : PathRank) {
float Score = static_cast<float>(KV.second.Count) / KV.second.MinDist;
if (Score > .0f) {
Candidates.push_back({Score, KV.second.MinDist, KV.first()});
}
}
// Sort candidates by descending score.
std::sort(Candidates.begin(), Candidates.end(),
[](const ScorePath &LHS, const ScorePath &RHS) {
return LHS.Score > RHS.Score;
});
// Sort highest score candidates by ascending minimal distance.
size_t Selected =
std::min(std::max<size_t>(NumParseModuleThreads, Candidates.size() / 2),
Candidates.size());
std::sort(Candidates.begin(), Candidates.begin() + Selected,
[](const ScorePath &LHS, const ScorePath &RHS) {
return LHS.MinDist < RHS.MinDist;
});
std::vector<std::string> Paths;
Paths.reserve(Selected);
for (unsigned i = 0; i < Selected; i++) {
Paths.push_back(Candidates[i].Path.str());
}
LLVM_DEBUG(dbgs() << "ModuleIndex: select " << Paths.size() << " out of "
<< Candidates.size() << " discovered paths\n");
return Paths;
}
unsigned ThinLtoModuleIndex::getNumDiscoveredModules() const {
// TODO: It would probably be more efficient to track the number of
// unscheduled modules.
unsigned NonNullItems = 0;
for (const auto &KV : PathRank)
if (KV.second.Count > 0)
++NonNullItems;
return NonNullItems;
}
} // namespace orc
} // namespace llvm