llvm-project/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer...

226 lines
8.1 KiB
C++

#include "ThinLtoInstrumentationLayer.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Process.h"
#include <cstdlib>
#define DEBUG_TYPE "thinltojit"
namespace llvm {
namespace orc {
// TODO: Fixed set of flags may not always be enough. Make this expandable.
void ThinLtoInstrumentationLayer::allocateDiscoveryFlags(unsigned MinFlags) {
// Round up to full memory pages.
unsigned PageSize = sys::Process::getPageSizeEstimate();
unsigned NumPagesEach = (MinFlags + (PageSize - 1)) / PageSize;
unsigned NumPagesTotal = 2 * NumPagesEach;
assert(isPowerOf2_64(PageSize) && "Adjust aligned memory alloc below");
// Allocate one more page to make up for size loss due to alignment.
void *Storage = std::calloc(NumPagesTotal + 1, PageSize);
uint64_t StorageAddr = reinterpret_cast<uint64_t>(Storage);
uint64_t PageSizeDecr = PageSize - 1;
uint64_t AlignedAddr = ((StorageAddr + PageSizeDecr) & ~PageSizeDecr);
uint64_t Diff = AlignedAddr - StorageAddr;
// For each flag we allocate one byte in each location: Incoming and Handled.
// TODO: 'Handled' could be a bitset, but size must be dynamic
NumFlagsUsed.store(0);
NumFlagsAllocated = NumPagesEach * PageSize;
FlagsStorage = static_cast<uint8_t *>(Storage);
FlagsIncoming = reinterpret_cast<Flag *>(FlagsStorage + Diff);
FlagsHandled = FlagsIncoming + NumFlagsAllocated;
static_assert(sizeof(FlagsIncoming[0]) == sizeof(uint8_t), "Flags are bytes");
assert(reinterpret_cast<uint64_t>(FlagsIncoming) % PageSize == 0);
assert(reinterpret_cast<uint64_t>(FlagsHandled) % PageSize == 0);
assert(NumFlagsAllocated >= MinFlags);
}
// Reserve a new set of discovery flags and return the index of the first one.
unsigned ThinLtoInstrumentationLayer::reserveDiscoveryFlags(unsigned Count) {
#ifndef NDEBUG
for (unsigned i = NumFlagsUsed.load(), e = i + Count; i < e; i++) {
assert(FlagsIncoming[i] == Clear);
}
#endif
assert(Count > 0);
return NumFlagsUsed.fetch_add(Count);
}
void ThinLtoInstrumentationLayer::registerDiscoveryFlagOwners(
std::vector<GlobalValue::GUID> Guids, unsigned FirstIdx) {
unsigned Count = Guids.size();
std::lock_guard<std::mutex> Lock(DiscoveryFlagsInfoLock);
for (unsigned i = 0; i < Count; i++) {
assert(!FlagOwnersMap.count(FirstIdx + i) &&
"Flag should not have an owner at this point");
FlagOwnersMap[FirstIdx + i] = Guids[i];
}
}
std::vector<unsigned> ThinLtoInstrumentationLayer::takeFlagsThatFired() {
// This is only effective with the respective Release.
FlagsSync.load(std::memory_order_acquire);
std::vector<unsigned> Indexes;
unsigned NumIndexesUsed = NumFlagsUsed.load();
for (unsigned i = 0; i < NumIndexesUsed; i++) {
if (FlagsIncoming[i] == Fired && FlagsHandled[i] == Clear) {
FlagsHandled[i] = Fired;
Indexes.push_back(i);
}
}
return Indexes;
}
std::vector<GlobalValue::GUID>
ThinLtoInstrumentationLayer::takeFlagOwners(std::vector<unsigned> Indexes) {
std::vector<GlobalValue::GUID> ReachedFunctions;
std::lock_guard<std::mutex> Lock(DiscoveryFlagsInfoLock);
for (unsigned i : Indexes) {
auto KV = FlagOwnersMap.find(i);
assert(KV != FlagOwnersMap.end());
ReachedFunctions.push_back(KV->second);
FlagOwnersMap.erase(KV);
}
return ReachedFunctions;
}
void ThinLtoInstrumentationLayer::nudgeIntoDiscovery(
std::vector<GlobalValue::GUID> Functions) {
unsigned Count = Functions.size();
// Registering synthetic flags in advance. We expect them to get processed
// before the respective functions get emitted. If not, the emit() function
unsigned FirstFlagIdx = reserveDiscoveryFlags(Functions.size());
registerDiscoveryFlagOwners(std::move(Functions), FirstFlagIdx);
// Initialize the flags as fired and force a cache sync, so discovery will
// pick them up as soon as possible.
for (unsigned i = FirstFlagIdx; i < FirstFlagIdx + Count; i++) {
FlagsIncoming[i] = Fired;
}
if (MemFence & ThinLtoJIT::FenceStaticCode) {
FlagsSync.store(0, std::memory_order_release);
}
LLVM_DEBUG(dbgs() << "Nudged " << Count << " new functions into discovery\n");
}
void ThinLtoInstrumentationLayer::emit(MaterializationResponsibility R,
ThreadSafeModule TSM) {
TSM.withModuleDo([this](Module &M) {
std::vector<Function *> FunctionsToInstrument;
// We may have discovered ahead of some functions already, but we still
// instrument them all. Their notifications steer the future direction of
// discovery.
for (Function &F : M.getFunctionList())
if (!F.isDeclaration())
FunctionsToInstrument.push_back(&F);
if (!FunctionsToInstrument.empty()) {
IRBuilder<> B(M.getContext());
std::vector<GlobalValue::GUID> NewDiscoveryRoots;
// Flags that fire must have owners registered. We will do it below and
// that's fine, because they can only be reached once the code is emitted.
unsigned FirstFlagIdx =
reserveDiscoveryFlags(FunctionsToInstrument.size());
unsigned NextFlagIdx = FirstFlagIdx;
for (Function *F : FunctionsToInstrument) {
// TODO: Emitting the write operation into an indirection stub would
// allow to skip it once we got the notification.
BasicBlock *E = &F->getEntryBlock();
B.SetInsertPoint(BasicBlock::Create(
M.getContext(), "NotifyFunctionReachedProlog", F, E));
compileFunctionReachedFlagSetter(B, FlagsIncoming + NextFlagIdx);
B.CreateBr(E);
std::string GlobalName = GlobalValue::getGlobalIdentifier(
F->getName(), F->getLinkage(), M.getSourceFileName());
NewDiscoveryRoots.push_back(GlobalValue::getGUID(GlobalName));
++NextFlagIdx;
}
LLVM_DEBUG(dbgs() << "Instrumented " << NewDiscoveryRoots.size()
<< " new functions in module " << M.getName() << "\n");
// Submit owner info, so the DiscoveryThread can evaluate the flags.
registerDiscoveryFlagOwners(std::move(NewDiscoveryRoots), FirstFlagIdx);
}
});
BaseLayer.emit(std::move(R), std::move(TSM));
}
void ThinLtoInstrumentationLayer::compileFunctionReachedFlagSetter(
IRBuilder<> &B, Flag *F) {
assert(*F == Clear);
Type *Int64Ty = Type::getInt64Ty(B.getContext());
// Write one immediate 8bit value to a fixed location in memory.
auto FlagAddr = pointerToJITTargetAddress(F);
Type *FlagTy = Type::getInt8Ty(B.getContext());
B.CreateStore(ConstantInt::get(FlagTy, Fired),
B.CreateIntToPtr(ConstantInt::get(Int64Ty, FlagAddr),
FlagTy->getPointerTo()));
if (MemFence & ThinLtoJIT::FenceJITedCode) {
// Overwrite the sync value with Release ordering. The discovery thread
// reads it with Acquire ordering. The actual value doesn't matter.
static constexpr bool IsVolatile = true;
static constexpr Instruction *NoInsertBefore = nullptr;
auto SyncFlagAddr = pointerToJITTargetAddress(&FlagsSync);
B.Insert(
new StoreInst(ConstantInt::get(Int64Ty, 0),
B.CreateIntToPtr(ConstantInt::get(Int64Ty, SyncFlagAddr),
Int64Ty->getPointerTo()),
IsVolatile, Align(64), AtomicOrdering::Release,
SyncScope::System, NoInsertBefore));
}
}
void ThinLtoInstrumentationLayer::dump(raw_ostream &OS) {
OS << "Discovery flags stats\n";
unsigned NumFlagsFired = 0;
for (unsigned i = 0; i < NumFlagsAllocated; i++) {
if (FlagsIncoming[i] == Fired)
++NumFlagsFired;
}
OS << "Alloc: " << format("%6.d", NumFlagsAllocated) << "\n";
OS << "Issued: " << format("%6.d", NumFlagsUsed.load()) << "\n";
OS << "Fired: " << format("%6.d", NumFlagsFired) << "\n";
unsigned RemainingFlagOwners = 0;
for (const auto &_ : FlagOwnersMap) {
++RemainingFlagOwners;
(void)_;
}
OS << "\nFlagOwnersMap has " << RemainingFlagOwners
<< " remaining entries.\n";
}
ThinLtoInstrumentationLayer::~ThinLtoInstrumentationLayer() {
std::free(FlagsStorage);
}
} // namespace orc
} // namespace llvm