forked from OSchip/llvm-project
443 lines
17 KiB
C++
443 lines
17 KiB
C++
//===---- ManagedMemoryRewrite.cpp - Rewrite global & malloc'd memory -----===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Take a module and rewrite:
|
|
// 1. `malloc` -> `polly_mallocManaged`
|
|
// 2. `free` -> `polly_freeManaged`
|
|
// 3. global arrays with initializers -> global arrays that are initialized
|
|
// with a constructor call to
|
|
// `polly_mallocManaged`.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "polly/CodeGen/CodeGeneration.h"
|
|
#include "polly/CodeGen/IslAst.h"
|
|
#include "polly/CodeGen/IslNodeBuilder.h"
|
|
#include "polly/CodeGen/PPCGCodeGeneration.h"
|
|
#include "polly/CodeGen/Utils.h"
|
|
#include "polly/DependenceInfo.h"
|
|
#include "polly/LinkAllPasses.h"
|
|
#include "polly/Options.h"
|
|
#include "polly/ScopDetection.h"
|
|
#include "polly/ScopInfo.h"
|
|
#include "polly/Support/SCEVValidator.h"
|
|
#include "llvm/Analysis/AliasAnalysis.h"
|
|
#include "llvm/Analysis/BasicAliasAnalysis.h"
|
|
#include "llvm/Analysis/CaptureTracking.h"
|
|
#include "llvm/Analysis/GlobalsModRef.h"
|
|
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
|
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
#include "llvm/IR/LegacyPassManager.h"
|
|
#include "llvm/IR/Verifier.h"
|
|
#include "llvm/IRReader/IRReader.h"
|
|
#include "llvm/Linker/Linker.h"
|
|
#include "llvm/Support/TargetRegistry.h"
|
|
#include "llvm/Support/TargetSelect.h"
|
|
#include "llvm/Target/TargetMachine.h"
|
|
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
|
#include "llvm/Transforms/Utils/ModuleUtils.h"
|
|
|
|
static cl::opt<bool> RewriteAllocas(
|
|
"polly-acc-rewrite-allocas",
|
|
cl::desc(
|
|
"Ask the managed memory rewriter to also rewrite alloca instructions"),
|
|
cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
|
|
|
|
static cl::opt<bool> IgnoreLinkageForGlobals(
|
|
"polly-acc-rewrite-ignore-linkage-for-globals",
|
|
cl::desc(
|
|
"By default, we only rewrite globals with internal linkage. This flag "
|
|
"enables rewriting of globals regardless of linkage"),
|
|
cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
|
|
|
|
#define DEBUG_TYPE "polly-acc-rewrite-managed-memory"
|
|
namespace {
|
|
|
|
static llvm::Function *getOrCreatePollyMallocManaged(Module &M) {
|
|
const char *Name = "polly_mallocManaged";
|
|
Function *F = M.getFunction(Name);
|
|
|
|
// If F is not available, declare it.
|
|
if (!F) {
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
PollyIRBuilder Builder(M.getContext());
|
|
// TODO: How do I get `size_t`? I assume from DataLayout?
|
|
FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(),
|
|
{Builder.getInt64Ty()}, false);
|
|
F = Function::Create(Ty, Linkage, Name, &M);
|
|
}
|
|
|
|
return F;
|
|
}
|
|
|
|
static llvm::Function *getOrCreatePollyFreeManaged(Module &M) {
|
|
const char *Name = "polly_freeManaged";
|
|
Function *F = M.getFunction(Name);
|
|
|
|
// If F is not available, declare it.
|
|
if (!F) {
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
PollyIRBuilder Builder(M.getContext());
|
|
// TODO: How do I get `size_t`? I assume from DataLayout?
|
|
FunctionType *Ty =
|
|
FunctionType::get(Builder.getVoidTy(), {Builder.getInt8PtrTy()}, false);
|
|
F = Function::Create(Ty, Linkage, Name, &M);
|
|
}
|
|
|
|
return F;
|
|
}
|
|
|
|
// Expand a constant expression `Cur`, which is used at instruction `Parent`
|
|
// at index `index`.
|
|
// Since a constant expression can expand to multiple instructions, store all
|
|
// the expands into a set called `Expands`.
|
|
// Note that this goes inorder on the constant expression tree.
|
|
// A * ((B * D) + C)
|
|
// will be processed with first A, then B * D, then B, then D, and then C.
|
|
// Though ConstantExprs are not treated as "trees" but as DAGs, since you can
|
|
// have something like this:
|
|
// *
|
|
// / \
|
|
// \ /
|
|
// (D)
|
|
//
|
|
// For the purposes of this expansion, we expand the two occurences of D
|
|
// separately. Therefore, we expand the DAG into the tree:
|
|
// *
|
|
// / \
|
|
// D D
|
|
// TODO: We don't _have_to do this, but this is the simplest solution.
|
|
// We can write a solution that keeps track of which constants have been
|
|
// already expanded.
|
|
static void expandConstantExpr(ConstantExpr *Cur, PollyIRBuilder &Builder,
|
|
Instruction *Parent, int index,
|
|
SmallPtrSet<Instruction *, 4> &Expands) {
|
|
assert(Cur && "invalid constant expression passed");
|
|
Instruction *I = Cur->getAsInstruction();
|
|
assert(I && "unable to convert ConstantExpr to Instruction");
|
|
|
|
LLVM_DEBUG(dbgs() << "Expanding ConstantExpression: (" << *Cur
|
|
<< ") in Instruction: (" << *I << ")\n";);
|
|
|
|
// Invalidate `Cur` so that no one after this point uses `Cur`. Rather,
|
|
// they should mutate `I`.
|
|
Cur = nullptr;
|
|
|
|
Expands.insert(I);
|
|
Parent->setOperand(index, I);
|
|
|
|
// The things that `Parent` uses (its operands) should be created
|
|
// before `Parent`.
|
|
Builder.SetInsertPoint(Parent);
|
|
Builder.Insert(I);
|
|
|
|
for (unsigned i = 0; i < I->getNumOperands(); i++) {
|
|
Value *Op = I->getOperand(i);
|
|
assert(isa<Constant>(Op) && "constant must have a constant operand");
|
|
|
|
if (ConstantExpr *CExprOp = dyn_cast<ConstantExpr>(Op))
|
|
expandConstantExpr(CExprOp, Builder, I, i, Expands);
|
|
}
|
|
}
|
|
|
|
// Edit all uses of `OldVal` to NewVal` in `Inst`. This will rewrite
|
|
// `ConstantExpr`s that are used in the `Inst`.
|
|
// Note that `replaceAllUsesWith` is insufficient for this purpose because it
|
|
// does not rewrite values in `ConstantExpr`s.
|
|
static void rewriteOldValToNew(Instruction *Inst, Value *OldVal, Value *NewVal,
|
|
PollyIRBuilder &Builder) {
|
|
|
|
// This contains a set of instructions in which OldVal must be replaced.
|
|
// We start with `Inst`, and we fill it up with the expanded `ConstantExpr`s
|
|
// from `Inst`s arguments.
|
|
// We need to go through this process because `replaceAllUsesWith` does not
|
|
// actually edit `ConstantExpr`s.
|
|
SmallPtrSet<Instruction *, 4> InstsToVisit = {Inst};
|
|
|
|
// Expand all `ConstantExpr`s and place it in `InstsToVisit`.
|
|
for (unsigned i = 0; i < Inst->getNumOperands(); i++) {
|
|
Value *Operand = Inst->getOperand(i);
|
|
if (ConstantExpr *ValueConstExpr = dyn_cast<ConstantExpr>(Operand))
|
|
expandConstantExpr(ValueConstExpr, Builder, Inst, i, InstsToVisit);
|
|
}
|
|
|
|
// Now visit each instruction and use `replaceUsesOfWith`. We know that
|
|
// will work because `I` cannot have any `ConstantExpr` within it.
|
|
for (Instruction *I : InstsToVisit)
|
|
I->replaceUsesOfWith(OldVal, NewVal);
|
|
}
|
|
|
|
// Given a value `Current`, return all Instructions that may contain `Current`
|
|
// in an expression.
|
|
// We need this auxiliary function, because if we have a
|
|
// `Constant` that is a user of `V`, we need to recurse into the
|
|
// `Constant`s uses to gather the root instruciton.
|
|
static void getInstructionUsersOfValue(Value *V,
|
|
SmallVector<Instruction *, 4> &Owners) {
|
|
if (auto *I = dyn_cast<Instruction>(V)) {
|
|
Owners.push_back(I);
|
|
} else {
|
|
// Anything that is a `User` must be a constant or an instruction.
|
|
auto *C = cast<Constant>(V);
|
|
for (Use &CUse : C->uses())
|
|
getInstructionUsersOfValue(CUse.getUser(), Owners);
|
|
}
|
|
}
|
|
|
|
static void
|
|
replaceGlobalArray(Module &M, const DataLayout &DL, GlobalVariable &Array,
|
|
SmallPtrSet<GlobalVariable *, 4> &ReplacedGlobals) {
|
|
// We only want arrays.
|
|
ArrayType *ArrayTy = dyn_cast<ArrayType>(Array.getType()->getElementType());
|
|
if (!ArrayTy)
|
|
return;
|
|
Type *ElemTy = ArrayTy->getElementType();
|
|
PointerType *ElemPtrTy = ElemTy->getPointerTo();
|
|
|
|
// We only wish to replace arrays that are visible in the module they
|
|
// inhabit. Otherwise, our type edit from [T] to T* would be illegal across
|
|
// modules.
|
|
const bool OnlyVisibleInsideModule = Array.hasPrivateLinkage() ||
|
|
Array.hasInternalLinkage() ||
|
|
IgnoreLinkageForGlobals;
|
|
if (!OnlyVisibleInsideModule) {
|
|
LLVM_DEBUG(
|
|
dbgs() << "Not rewriting (" << Array
|
|
<< ") to managed memory "
|
|
"because it could be visible externally. To force rewrite, "
|
|
"use -polly-acc-rewrite-ignore-linkage-for-globals.\n");
|
|
return;
|
|
}
|
|
|
|
if (!Array.hasInitializer() ||
|
|
!isa<ConstantAggregateZero>(Array.getInitializer())) {
|
|
LLVM_DEBUG(dbgs() << "Not rewriting (" << Array
|
|
<< ") to managed memory "
|
|
"because it has an initializer which is "
|
|
"not a zeroinitializer.\n");
|
|
return;
|
|
}
|
|
|
|
// At this point, we have committed to replacing this array.
|
|
ReplacedGlobals.insert(&Array);
|
|
|
|
std::string NewName = Array.getName();
|
|
NewName += ".toptr";
|
|
GlobalVariable *ReplacementToArr =
|
|
cast<GlobalVariable>(M.getOrInsertGlobal(NewName, ElemPtrTy));
|
|
ReplacementToArr->setInitializer(ConstantPointerNull::get(ElemPtrTy));
|
|
|
|
Function *PollyMallocManaged = getOrCreatePollyMallocManaged(M);
|
|
std::string FnName = Array.getName();
|
|
FnName += ".constructor";
|
|
PollyIRBuilder Builder(M.getContext());
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
|
|
const GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
Function *F = Function::Create(Ty, Linkage, FnName, &M);
|
|
BasicBlock *Start = BasicBlock::Create(M.getContext(), "entry", F);
|
|
Builder.SetInsertPoint(Start);
|
|
|
|
const uint64_t ArraySizeInt = DL.getTypeAllocSize(ArrayTy);
|
|
Value *ArraySize = Builder.getInt64(ArraySizeInt);
|
|
ArraySize->setName("array.size");
|
|
|
|
Value *AllocatedMemRaw =
|
|
Builder.CreateCall(PollyMallocManaged, {ArraySize}, "mem.raw");
|
|
Value *AllocatedMemTyped =
|
|
Builder.CreatePointerCast(AllocatedMemRaw, ElemPtrTy, "mem.typed");
|
|
Builder.CreateStore(AllocatedMemTyped, ReplacementToArr);
|
|
Builder.CreateRetVoid();
|
|
|
|
const int Priority = 0;
|
|
appendToGlobalCtors(M, F, Priority, ReplacementToArr);
|
|
|
|
SmallVector<Instruction *, 4> ArrayUserInstructions;
|
|
// Get all instructions that use array. We need to do this weird thing
|
|
// because `Constant`s that contain this array neeed to be expanded into
|
|
// instructions so that we can replace their parameters. `Constant`s cannot
|
|
// be edited easily, so we choose to convert all `Constant`s to
|
|
// `Instruction`s and handle all of the uses of `Array` uniformly.
|
|
for (Use &ArrayUse : Array.uses())
|
|
getInstructionUsersOfValue(ArrayUse.getUser(), ArrayUserInstructions);
|
|
|
|
for (Instruction *UserOfArrayInst : ArrayUserInstructions) {
|
|
|
|
Builder.SetInsertPoint(UserOfArrayInst);
|
|
// <ty>** -> <ty>*
|
|
Value *ArrPtrLoaded = Builder.CreateLoad(ReplacementToArr, "arrptr.load");
|
|
// <ty>* -> [ty]*
|
|
Value *ArrPtrLoadedBitcasted = Builder.CreateBitCast(
|
|
ArrPtrLoaded, ArrayTy->getPointerTo(), "arrptr.bitcast");
|
|
rewriteOldValToNew(UserOfArrayInst, &Array, ArrPtrLoadedBitcasted, Builder);
|
|
}
|
|
}
|
|
|
|
// We return all `allocas` that may need to be converted to a call to
|
|
// cudaMallocManaged.
|
|
static void getAllocasToBeManaged(Function &F,
|
|
SmallSet<AllocaInst *, 4> &Allocas) {
|
|
for (BasicBlock &BB : F) {
|
|
for (Instruction &I : BB) {
|
|
auto *Alloca = dyn_cast<AllocaInst>(&I);
|
|
if (!Alloca)
|
|
continue;
|
|
LLVM_DEBUG(dbgs() << "Checking if (" << *Alloca << ") may be captured: ");
|
|
|
|
if (PointerMayBeCaptured(Alloca, /* ReturnCaptures */ false,
|
|
/* StoreCaptures */ true)) {
|
|
Allocas.insert(Alloca);
|
|
LLVM_DEBUG(dbgs() << "YES (captured).\n");
|
|
} else {
|
|
LLVM_DEBUG(dbgs() << "NO (not captured).\n");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void rewriteAllocaAsManagedMemory(AllocaInst *Alloca,
|
|
const DataLayout &DL) {
|
|
LLVM_DEBUG(dbgs() << "rewriting: (" << *Alloca << ") to managed mem.\n");
|
|
Module *M = Alloca->getModule();
|
|
assert(M && "Alloca does not have a module");
|
|
|
|
PollyIRBuilder Builder(M->getContext());
|
|
Builder.SetInsertPoint(Alloca);
|
|
|
|
Value *MallocManagedFn = getOrCreatePollyMallocManaged(*Alloca->getModule());
|
|
const uint64_t Size =
|
|
DL.getTypeAllocSize(Alloca->getType()->getElementType());
|
|
Value *SizeVal = Builder.getInt64(Size);
|
|
Value *RawManagedMem = Builder.CreateCall(MallocManagedFn, {SizeVal});
|
|
Value *Bitcasted = Builder.CreateBitCast(RawManagedMem, Alloca->getType());
|
|
|
|
Function *F = Alloca->getFunction();
|
|
assert(F && "Alloca has invalid function");
|
|
|
|
Bitcasted->takeName(Alloca);
|
|
Alloca->replaceAllUsesWith(Bitcasted);
|
|
Alloca->eraseFromParent();
|
|
|
|
for (BasicBlock &BB : *F) {
|
|
ReturnInst *Return = dyn_cast<ReturnInst>(BB.getTerminator());
|
|
if (!Return)
|
|
continue;
|
|
Builder.SetInsertPoint(Return);
|
|
|
|
Value *FreeManagedFn = getOrCreatePollyFreeManaged(*M);
|
|
Builder.CreateCall(FreeManagedFn, {RawManagedMem});
|
|
}
|
|
}
|
|
|
|
// Replace all uses of `Old` with `New`, even inside `ConstantExpr`.
|
|
//
|
|
// `replaceAllUsesWith` does replace values in `ConstantExpr`. This function
|
|
// actually does replace it in `ConstantExpr`. The caveat is that if there is
|
|
// a use that is *outside* a function (say, at global declarations), we fail.
|
|
// So, this is meant to be used on values which we know will only be used
|
|
// within functions.
|
|
//
|
|
// This process works by looking through the uses of `Old`. If it finds a
|
|
// `ConstantExpr`, it recursively looks for the owning instruction.
|
|
// Then, it expands all the `ConstantExpr` to instructions and replaces
|
|
// `Old` with `New` in the expanded instructions.
|
|
static void replaceAllUsesAndConstantUses(Value *Old, Value *New,
|
|
PollyIRBuilder &Builder) {
|
|
SmallVector<Instruction *, 4> UserInstructions;
|
|
// Get all instructions that use array. We need to do this weird thing
|
|
// because `Constant`s that contain this array neeed to be expanded into
|
|
// instructions so that we can replace their parameters. `Constant`s cannot
|
|
// be edited easily, so we choose to convert all `Constant`s to
|
|
// `Instruction`s and handle all of the uses of `Array` uniformly.
|
|
for (Use &ArrayUse : Old->uses())
|
|
getInstructionUsersOfValue(ArrayUse.getUser(), UserInstructions);
|
|
|
|
for (Instruction *I : UserInstructions)
|
|
rewriteOldValToNew(I, Old, New, Builder);
|
|
}
|
|
|
|
class ManagedMemoryRewritePass : public ModulePass {
|
|
public:
|
|
static char ID;
|
|
GPUArch Architecture;
|
|
GPURuntime Runtime;
|
|
|
|
ManagedMemoryRewritePass() : ModulePass(ID) {}
|
|
virtual bool runOnModule(Module &M) {
|
|
const DataLayout &DL = M.getDataLayout();
|
|
|
|
Function *Malloc = M.getFunction("malloc");
|
|
|
|
if (Malloc) {
|
|
PollyIRBuilder Builder(M.getContext());
|
|
Function *PollyMallocManaged = getOrCreatePollyMallocManaged(M);
|
|
assert(PollyMallocManaged && "unable to create polly_mallocManaged");
|
|
|
|
replaceAllUsesAndConstantUses(Malloc, PollyMallocManaged, Builder);
|
|
Malloc->eraseFromParent();
|
|
}
|
|
|
|
Function *Free = M.getFunction("free");
|
|
|
|
if (Free) {
|
|
PollyIRBuilder Builder(M.getContext());
|
|
Function *PollyFreeManaged = getOrCreatePollyFreeManaged(M);
|
|
assert(PollyFreeManaged && "unable to create polly_freeManaged");
|
|
|
|
replaceAllUsesAndConstantUses(Free, PollyFreeManaged, Builder);
|
|
Free->eraseFromParent();
|
|
}
|
|
|
|
SmallPtrSet<GlobalVariable *, 4> GlobalsToErase;
|
|
for (GlobalVariable &Global : M.globals())
|
|
replaceGlobalArray(M, DL, Global, GlobalsToErase);
|
|
for (GlobalVariable *G : GlobalsToErase)
|
|
G->eraseFromParent();
|
|
|
|
// Rewrite allocas to cudaMallocs if we are asked to do so.
|
|
if (RewriteAllocas) {
|
|
SmallSet<AllocaInst *, 4> AllocasToBeManaged;
|
|
for (Function &F : M.functions())
|
|
getAllocasToBeManaged(F, AllocasToBeManaged);
|
|
|
|
for (AllocaInst *Alloca : AllocasToBeManaged)
|
|
rewriteAllocaAsManagedMemory(Alloca, DL);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
};
|
|
} // namespace
|
|
char ManagedMemoryRewritePass::ID = 42;
|
|
|
|
Pass *polly::createManagedMemoryRewritePassPass(GPUArch Arch,
|
|
GPURuntime Runtime) {
|
|
ManagedMemoryRewritePass *pass = new ManagedMemoryRewritePass();
|
|
pass->Runtime = Runtime;
|
|
pass->Architecture = Arch;
|
|
return pass;
|
|
}
|
|
|
|
INITIALIZE_PASS_BEGIN(
|
|
ManagedMemoryRewritePass, "polly-acc-rewrite-managed-memory",
|
|
"Polly - Rewrite all allocations in heap & data section to managed memory",
|
|
false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(PPCGCodeGeneration);
|
|
INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
|
|
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
|
|
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
|
|
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
|
|
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
|
|
INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
|
|
INITIALIZE_PASS_END(
|
|
ManagedMemoryRewritePass, "polly-acc-rewrite-managed-memory",
|
|
"Polly - Rewrite all allocations in heap & data section to managed memory",
|
|
false, false)
|