forked from OSchip/llvm-project
[OpenMP][IR-Builder] Introduce "pragma omp parallel" code generation
This patch combines the `emitParallel` logic prototyped in D61953 with the OpenMPIRBuilder (D69785) and introduces `CreateParallel`. Reviewed By: fghanim Differential Revision: https://reviews.llvm.org/D70109
This commit is contained in:
parent
2e6c15d1e7
commit
e4add9727b
|
@ -49,6 +49,16 @@ enum class RuntimeFunction {
|
|||
#define OMP_RTL(Enum, ...) constexpr auto Enum = omp::RuntimeFunction::Enum;
|
||||
#include "llvm/Frontend/OpenMP/OMPKinds.def"
|
||||
|
||||
/// IDs for the different proc bind kinds.
|
||||
enum class ProcBindKind {
|
||||
#define OMP_PROC_BIND_KIND(Enum, Str, Value) Enum = Value,
|
||||
#include "llvm/Frontend/OpenMP/OMPKinds.def"
|
||||
};
|
||||
|
||||
#define OMP_PROC_BIND_KIND(Enum, ...) \
|
||||
constexpr auto Enum = omp::ProcBindKind::Enum;
|
||||
#include "llvm/Frontend/OpenMP/OMPKinds.def"
|
||||
|
||||
/// IDs for all omp runtime library ident_t flag encodings (see
|
||||
/// their defintion in openmp/runtime/src/kmp.h).
|
||||
enum class IdentFlag {
|
||||
|
@ -67,8 +77,8 @@ Directive getOpenMPDirectiveKind(StringRef Str);
|
|||
StringRef getOpenMPDirectiveName(Directive D);
|
||||
|
||||
/// Forward declarations for LLVM-IR types (simple, function and structure) are
|
||||
/// generated below. Their names are defined and used in OpenMPKinds.def. Here
|
||||
/// we provide the forward declarations, the initializeTypes function will
|
||||
/// generated below. Their names are defined and used in OpenMP/OMPKinds.def.
|
||||
/// Here we provide the forward declarations, the initializeTypes function will
|
||||
/// provide the values.
|
||||
///
|
||||
///{
|
||||
|
@ -83,10 +93,10 @@ namespace types {
|
|||
extern PointerType *VarName##Ptr;
|
||||
#include "llvm/Frontend/OpenMP/OMPKinds.def"
|
||||
|
||||
/// Helper to initialize all types defined in OpenMPKinds.def.
|
||||
/// Helper to initialize all types defined in OpenMP/OMPKinds.def.
|
||||
void initializeTypes(Module &M);
|
||||
|
||||
/// Helper to uninitialize all types defined in OpenMPKinds.def.
|
||||
/// Helper to uninitialize all types defined in OpenMP/OMPKinds.def.
|
||||
void uninitializeTypes();
|
||||
|
||||
} // namespace types
|
||||
|
|
|
@ -75,6 +75,40 @@ public:
|
|||
/// NOTE: Temporary solution until Clang CG is gone.
|
||||
void popFinalizationCB() { FinalizationStack.pop_back(); }
|
||||
|
||||
/// Callback type for body (=inner region) code generation
|
||||
///
|
||||
/// The callback takes code locations as arguments, each describing a
|
||||
/// location at which code might need to be generated or a location that is
|
||||
/// the target of control transfer.
|
||||
///
|
||||
/// \param AllocaIP is the insertion point at which new alloca instructions
|
||||
/// should be placed.
|
||||
/// \param CodeGenIP is the insertion point at which the body code should be
|
||||
/// placed.
|
||||
/// \param ContinuationBB is the basic block target to leave the body.
|
||||
///
|
||||
/// Note that all blocks pointed to by the arguments have terminators.
|
||||
using BodyGenCallbackTy = function_ref<void(
|
||||
InsertPointTy /* AllocaIP */, InsertPointTy /* CodeGenIP */,
|
||||
BasicBlock & /* ContinuationBB */)>;
|
||||
|
||||
/// Callback type for variable privatization (think copy & default
|
||||
/// constructor).
|
||||
///
|
||||
/// \param AllocaIP is the insertion point at which new alloca instructions
|
||||
/// should be placed.
|
||||
/// \param CodeGenIP is the insertion point at which the privatization code
|
||||
/// should be placed.
|
||||
/// \param Val The value beeing copied/created.
|
||||
/// \param ReplVal The replacement value, thus a copy or new created version
|
||||
/// of \p Val.
|
||||
///
|
||||
/// \returns The new insertion point where code generation continues and
|
||||
/// \p ReplVal the replacement of \p Val.
|
||||
using PrivatizeCallbackTy = function_ref<InsertPointTy(
|
||||
InsertPointTy /* AllocaIP */, InsertPointTy /* CodeGenIP */,
|
||||
Value & /* Val */, Value *& /* ReplVal */)>;
|
||||
|
||||
/// Description of a LLVM-IR insertion point (IP) and a debug/source location
|
||||
/// (filename, line, column, ...).
|
||||
struct LocationDescription {
|
||||
|
@ -105,6 +139,24 @@ public:
|
|||
bool ForceSimpleCall = false,
|
||||
bool CheckCancelFlag = true);
|
||||
|
||||
/// Generator for '#omp parallel'
|
||||
///
|
||||
/// \param Loc The insert and source location description.
|
||||
/// \param BodyGenCB Callback that will generate the region code.
|
||||
/// \param PrivCB Callback to copy a given variable (think copy constructor).
|
||||
/// \param FiniCB Callback to finalize variable copies.
|
||||
/// \param IfCondition The evaluated 'if' clause expression, if any.
|
||||
/// \param NumThreads The evaluated 'num_threads' clause expression, if any.
|
||||
/// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
|
||||
/// \param IsCancellable Flag to indicate a cancellable parallel region.
|
||||
///
|
||||
/// \returns The insertion position *after* the parallel.
|
||||
IRBuilder<>::InsertPoint
|
||||
CreateParallel(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
|
||||
PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB,
|
||||
Value *IfCondition, Value *NumThreads,
|
||||
omp::ProcBindKind ProcBind, bool IsCancellable);
|
||||
|
||||
///}
|
||||
|
||||
private:
|
||||
|
|
|
@ -167,6 +167,11 @@ __OMP_RTL(__kmpc_barrier, false, Void, IdentPtr, Int32)
|
|||
__OMP_RTL(__kmpc_cancel_barrier, false, Int32, IdentPtr, Int32)
|
||||
__OMP_RTL(__kmpc_global_thread_num, false, Int32, IdentPtr)
|
||||
__OMP_RTL(__kmpc_fork_call, true, Void, IdentPtr, Int32, ParallelTaskPtr)
|
||||
__OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */Int32)
|
||||
__OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */Int32)
|
||||
__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
|
||||
__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
|
||||
|
||||
__OMP_RTL(omp_get_thread_num, false, Int32, )
|
||||
|
||||
#undef __OMP_RTL
|
||||
|
@ -234,3 +239,26 @@ __OMP_IDENT_FLAG(BARRIER_IMPL_WORKSHARE, 0x01C0)
|
|||
#undef OMP_IDENT_FLAG
|
||||
|
||||
///}
|
||||
|
||||
|
||||
/// Proc bind kinds
|
||||
///
|
||||
///{
|
||||
|
||||
#ifndef OMP_PROC_BIND_KIND
|
||||
#define OMP_PROC_BIND_KIND(Enum, Str, Value)
|
||||
#endif
|
||||
|
||||
#define __OMP_PROC_BIND_KIND(Name, Value) \
|
||||
OMP_PROC_BIND_KIND(OMP_PB_##Name, #Name, Value)
|
||||
|
||||
__OMP_PROC_BIND_KIND(master, 2)
|
||||
__OMP_PROC_BIND_KIND(close, 3)
|
||||
__OMP_PROC_BIND_KIND(spread, 4)
|
||||
__OMP_PROC_BIND_KIND(default, 6)
|
||||
__OMP_PROC_BIND_KIND(unknown, 7)
|
||||
|
||||
#undef __OMP_PROC_BIND_KIND
|
||||
#undef OMP_PROC_BIND_KIND
|
||||
|
||||
///}
|
||||
|
|
|
@ -16,10 +16,13 @@
|
|||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
#include "llvm/IR/DebugInfo.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/CodeExtractor.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
|
@ -216,8 +219,17 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
|
|||
if (UseCancelBarrier && CheckCancelFlag) {
|
||||
// For a cancel barrier we create two new blocks.
|
||||
BasicBlock *BB = Builder.GetInsertBlock();
|
||||
BasicBlock *NonCancellationBlock = BasicBlock::Create(
|
||||
BB->getContext(), BB->getName() + ".cont", BB->getParent());
|
||||
BasicBlock *NonCancellationBlock;
|
||||
if (Builder.GetInsertPoint() == BB->end()) {
|
||||
// TODO: This branch will not be needed once we moved to the
|
||||
// OpenMPIRBuilder codegen completely.
|
||||
NonCancellationBlock = BasicBlock::Create(
|
||||
BB->getContext(), BB->getName() + ".cont", BB->getParent());
|
||||
} else {
|
||||
NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
|
||||
BB->getTerminator()->eraseFromParent();
|
||||
Builder.SetInsertPoint(BB);
|
||||
}
|
||||
BasicBlock *CancellationBlock = BasicBlock::Create(
|
||||
BB->getContext(), BB->getName() + ".cncl", BB->getParent());
|
||||
|
||||
|
@ -233,8 +245,310 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
|
|||
FI.FiniCB(Builder.saveIP());
|
||||
|
||||
// The continuation block is where code generation continues.
|
||||
Builder.SetInsertPoint(NonCancellationBlock);
|
||||
Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
|
||||
}
|
||||
|
||||
return Builder.saveIP();
|
||||
}
|
||||
|
||||
IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
|
||||
const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
|
||||
PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
|
||||
Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
|
||||
if (!updateToLocation(Loc))
|
||||
return Loc.IP;
|
||||
|
||||
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
|
||||
Value *Ident = getOrCreateIdent(SrcLocStr);
|
||||
Value *ThreadID = getOrCreateThreadID(Ident);
|
||||
|
||||
if (NumThreads) {
|
||||
// Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
|
||||
Value *Args[] = {
|
||||
Ident, ThreadID,
|
||||
Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
|
||||
Builder.CreateCall(
|
||||
getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args);
|
||||
}
|
||||
|
||||
if (ProcBind != OMP_PB_default) {
|
||||
// Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
|
||||
Value *Args[] = {
|
||||
Ident, ThreadID,
|
||||
ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
|
||||
Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind),
|
||||
Args);
|
||||
}
|
||||
|
||||
BasicBlock *InsertBB = Builder.GetInsertBlock();
|
||||
Function *OuterFn = InsertBB->getParent();
|
||||
|
||||
// Vector to remember instructions we used only during the modeling but which
|
||||
// we want to delete at the end.
|
||||
SmallVector<Instruction *, 4> ToBeDeleted;
|
||||
|
||||
Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI());
|
||||
AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
|
||||
AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
|
||||
|
||||
// If there is an if condition we actually use the TIDAddr and ZeroAddr in the
|
||||
// program, otherwise we only need them for modeling purposes to get the
|
||||
// associated arguments in the outlined function. In the former case,
|
||||
// initialize the allocas properly, in the latter case, delete them later.
|
||||
if (IfCondition) {
|
||||
Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
|
||||
Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
|
||||
} else {
|
||||
ToBeDeleted.push_back(TIDAddr);
|
||||
ToBeDeleted.push_back(ZeroAddr);
|
||||
}
|
||||
|
||||
// Create an artificial insertion point that will also ensure the blocks we
|
||||
// are about to split are not degenerated.
|
||||
auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
|
||||
|
||||
Instruction *ThenTI = UI, *ElseTI = nullptr;
|
||||
if (IfCondition)
|
||||
SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
|
||||
|
||||
BasicBlock *ThenBB = ThenTI->getParent();
|
||||
BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
|
||||
BasicBlock *PRegBodyBB =
|
||||
PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
|
||||
BasicBlock *PRegPreFiniBB =
|
||||
PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
|
||||
BasicBlock *PRegExitBB =
|
||||
PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
|
||||
|
||||
auto FiniCBWrapper = [&](InsertPointTy IP) {
|
||||
// Hide "open-ended" blocks from the given FiniCB by setting the right jump
|
||||
// target to the region exit block.
|
||||
if (IP.getBlock()->end() == IP.getPoint()) {
|
||||
IRBuilder<>::InsertPointGuard IPG(Builder);
|
||||
Builder.restoreIP(IP);
|
||||
Instruction *I = Builder.CreateBr(PRegExitBB);
|
||||
IP = InsertPointTy(I->getParent(), I->getIterator());
|
||||
}
|
||||
assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
|
||||
IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
|
||||
"Unexpected insertion point for finalization call!");
|
||||
return FiniCB(IP);
|
||||
};
|
||||
|
||||
FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
|
||||
|
||||
// Generate the privatization allocas in the block that will become the entry
|
||||
// of the outlined function.
|
||||
InsertPointTy AllocaIP(PRegEntryBB,
|
||||
PRegEntryBB->getTerminator()->getIterator());
|
||||
Builder.restoreIP(AllocaIP);
|
||||
AllocaInst *PrivTIDAddr =
|
||||
Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
|
||||
Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
|
||||
|
||||
// Add some fake uses for OpenMP provided arguments.
|
||||
ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
|
||||
ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use"));
|
||||
|
||||
// ThenBB
|
||||
// |
|
||||
// V
|
||||
// PRegionEntryBB <- Privatization allocas are placed here.
|
||||
// |
|
||||
// V
|
||||
// PRegionBodyBB <- BodeGen is invoked here.
|
||||
// |
|
||||
// V
|
||||
// PRegPreFiniBB <- The block we will start finalization from.
|
||||
// |
|
||||
// V
|
||||
// PRegionExitBB <- A common exit to simplify block collection.
|
||||
//
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n");
|
||||
|
||||
// Let the caller create the body.
|
||||
assert(BodyGenCB && "Expected body generation callback!");
|
||||
InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
|
||||
BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n");
|
||||
|
||||
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
|
||||
SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist;
|
||||
ParallelRegionBlockSet.insert(PRegEntryBB);
|
||||
ParallelRegionBlockSet.insert(PRegExitBB);
|
||||
|
||||
// Collect all blocks in-between PRegEntryBB and PRegExitBB.
|
||||
Worklist.push_back(PRegEntryBB);
|
||||
while (!Worklist.empty()) {
|
||||
BasicBlock *BB = Worklist.pop_back_val();
|
||||
ParallelRegionBlocks.push_back(BB);
|
||||
for (BasicBlock *SuccBB : successors(BB))
|
||||
if (ParallelRegionBlockSet.insert(SuccBB).second)
|
||||
Worklist.push_back(SuccBB);
|
||||
}
|
||||
|
||||
CodeExtractorAnalysisCache CEAC(*OuterFn);
|
||||
CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr,
|
||||
/* AggregateArgs */ false,
|
||||
/* BlockFrequencyInfo */ nullptr,
|
||||
/* BranchProbabilityInfo */ nullptr,
|
||||
/* AssumptionCache */ nullptr,
|
||||
/* AllowVarArgs */ true,
|
||||
/* AllowAlloca */ true,
|
||||
/* Suffix */ ".omp_par");
|
||||
|
||||
// Find inputs to, outputs from the code region.
|
||||
BasicBlock *CommonExit = nullptr;
|
||||
SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
|
||||
Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
|
||||
Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n");
|
||||
|
||||
FunctionCallee TIDRTLFn =
|
||||
getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num);
|
||||
|
||||
auto PrivHelper = [&](Value &V) {
|
||||
if (&V == TIDAddr || &V == ZeroAddr)
|
||||
return;
|
||||
|
||||
SmallVector<Use *, 8> Uses;
|
||||
for (Use &U : V.uses())
|
||||
if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
|
||||
if (ParallelRegionBlockSet.count(UserI->getParent()))
|
||||
Uses.push_back(&U);
|
||||
|
||||
Value *ReplacementValue = nullptr;
|
||||
CallInst *CI = dyn_cast<CallInst>(&V);
|
||||
if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
|
||||
ReplacementValue = PrivTID;
|
||||
} else {
|
||||
Builder.restoreIP(
|
||||
PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue));
|
||||
assert(ReplacementValue &&
|
||||
"Expected copy/create callback to set replacement value!");
|
||||
if (ReplacementValue == &V)
|
||||
return;
|
||||
}
|
||||
|
||||
for (Use *UPtr : Uses)
|
||||
UPtr->set(ReplacementValue);
|
||||
};
|
||||
|
||||
for (Value *Input : Inputs) {
|
||||
LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
|
||||
PrivHelper(*Input);
|
||||
}
|
||||
for (Value *Output : Outputs) {
|
||||
LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
|
||||
PrivHelper(*Output);
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n");
|
||||
LLVM_DEBUG({
|
||||
for (auto *BB : ParallelRegionBlocks)
|
||||
dbgs() << " PBR: " << BB->getName() << "\n";
|
||||
});
|
||||
|
||||
Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
|
||||
LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n");
|
||||
LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
|
||||
|
||||
// Remove the artificial entry introduced by the extractor right away, we
|
||||
// made our own entry block after all.
|
||||
{
|
||||
BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
|
||||
assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB);
|
||||
assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry);
|
||||
PRegEntryBB->moveBefore(&ArtificialEntry);
|
||||
ArtificialEntry.eraseFromParent();
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n");
|
||||
assert(&OutlinedFn->getEntryBlock() == PRegEntryBB);
|
||||
|
||||
assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
|
||||
assert(OutlinedFn->arg_size() >= 2 &&
|
||||
"Expected at least tid and bounded tid as arguments");
|
||||
unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2;
|
||||
|
||||
CallInst *CI = cast<CallInst>(OutlinedFn->user_back());
|
||||
CI->getParent()->setName("omp_parallel");
|
||||
Builder.SetInsertPoint(CI);
|
||||
|
||||
// Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
|
||||
Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
|
||||
Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)};
|
||||
|
||||
SmallVector<Value *, 16> RealArgs;
|
||||
RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
|
||||
RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
|
||||
|
||||
FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
|
||||
Builder.CreateCall(RTLFn, RealArgs);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "With fork_call placed: "
|
||||
<< *Builder.GetInsertBlock()->getParent() << "\n");
|
||||
|
||||
InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
|
||||
InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
|
||||
UI->eraseFromParent();
|
||||
|
||||
// Initialize the local TID stack location with the argument value.
|
||||
Builder.SetInsertPoint(PrivTID);
|
||||
Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin();
|
||||
Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
|
||||
|
||||
// If no "if" clause was present we do not need the call created during
|
||||
// outlining, otherwise we reuse it in the serialized parallel region.
|
||||
if (!ElseTI) {
|
||||
CI->eraseFromParent();
|
||||
} else {
|
||||
|
||||
// If an "if" clause was present we are now generating the serialized
|
||||
// version into the "else" branch.
|
||||
Builder.SetInsertPoint(ElseTI);
|
||||
|
||||
// Build calls __kmpc_serialized_parallel(&Ident, GTid);
|
||||
Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
|
||||
Builder.CreateCall(
|
||||
getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
|
||||
SerializedParallelCallArgs);
|
||||
|
||||
// OutlinedFn(>id, &zero, CapturedStruct);
|
||||
CI->removeFromParent();
|
||||
Builder.Insert(CI);
|
||||
|
||||
// __kmpc_end_serialized_parallel(&Ident, GTid);
|
||||
Value *EndArgs[] = {Ident, ThreadID};
|
||||
Builder.CreateCall(
|
||||
getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
|
||||
EndArgs);
|
||||
|
||||
LLVM_DEBUG(dbgs() << "With serialized parallel region: "
|
||||
<< *Builder.GetInsertBlock()->getParent() << "\n");
|
||||
}
|
||||
|
||||
// Adjust the finalization stack, verify the adjustment, and call the
|
||||
// finalize function a last time to finalize values between the pre-fini block
|
||||
// and the exit block if we left the parallel "the normal way".
|
||||
auto FiniInfo = FinalizationStack.pop_back_val();
|
||||
(void)FiniInfo;
|
||||
assert(FiniInfo.DK == OMPD_parallel &&
|
||||
"Unexpected finalization stack state!");
|
||||
|
||||
Instruction *PreFiniTI = PRegPreFiniBB->getTerminator();
|
||||
assert(PreFiniTI->getNumSuccessors() == 1 &&
|
||||
PreFiniTI->getSuccessor(0)->size() == 1 &&
|
||||
isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) &&
|
||||
"Unexpected CFG structure!");
|
||||
|
||||
InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator());
|
||||
FiniCB(PreFiniIP);
|
||||
|
||||
for (Instruction *I : ToBeDeleted)
|
||||
I->eraseFromParent();
|
||||
|
||||
return AfterIP;
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
|
|||
FrontendOpenMP
|
||||
Support
|
||||
Passes
|
||||
TransformUtils
|
||||
)
|
||||
|
||||
add_llvm_unittest(LLVMFrontendTests
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Frontend/OpenMP/OMPConstants.h"
|
||||
#include "llvm/IR/Verifier.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
@ -99,20 +100,18 @@ TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
|
|||
}
|
||||
|
||||
TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
|
||||
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
|
||||
OpenMPIRBuilder OMPBuilder(*M);
|
||||
OMPBuilder.initialize();
|
||||
|
||||
BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
|
||||
new UnreachableInst(Ctx, CBB);
|
||||
auto FiniCB = [CBB](llvm::OpenMPIRBuilder::InsertPointTy IP) {
|
||||
assert(IP.getBlock()->end() == IP.getPoint() &&
|
||||
"Clang CG should cause non-terminated block!");
|
||||
auto FiniCB = [&](InsertPointTy IP) {
|
||||
ASSERT_NE(IP.getBlock(), nullptr);
|
||||
ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
|
||||
BranchInst::Create(CBB, IP.getBlock());
|
||||
};
|
||||
// Emulate an outer parallel.
|
||||
llvm::OpenMPIRBuilder::FinalizationInfo FI(
|
||||
{FiniCB, OMPD_parallel, /* HasCancel */ true});
|
||||
OMPBuilder.pushFinalizationCB(std::move(FI));
|
||||
OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
|
||||
|
||||
IRBuilder<> Builder(BB);
|
||||
|
||||
|
@ -141,6 +140,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
|
|||
Instruction *BarrierBBTI = Barrier->getParent()->getTerminator();
|
||||
EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U);
|
||||
EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock());
|
||||
EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U);
|
||||
EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
|
||||
1U);
|
||||
EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),
|
||||
|
@ -188,4 +188,309 @@ TEST_F(OpenMPIRBuilderTest, DbgLoc) {
|
|||
return;
|
||||
EXPECT_EQ(SrcSrc->getAsCString(), ";test.dbg;foo;3;7;;");
|
||||
}
|
||||
|
||||
TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
|
||||
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
|
||||
OpenMPIRBuilder OMPBuilder(*M);
|
||||
OMPBuilder.initialize();
|
||||
F->setName("func");
|
||||
IRBuilder<> Builder(BB);
|
||||
|
||||
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
|
||||
|
||||
AllocaInst *PrivAI = nullptr;
|
||||
|
||||
unsigned NumBodiesGenerated = 0;
|
||||
unsigned NumPrivatizedVars = 0;
|
||||
unsigned NumFinalizationPoints = 0;
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
|
||||
BasicBlock &ContinuationIP) {
|
||||
++NumBodiesGenerated;
|
||||
|
||||
Builder.restoreIP(AllocaIP);
|
||||
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
|
||||
Builder.CreateStore(F->arg_begin(), PrivAI);
|
||||
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
Value *PrivLoad = Builder.CreateLoad(PrivAI, "local.use");
|
||||
Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
|
||||
Instruction *ThenTerm, *ElseTerm;
|
||||
SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
|
||||
&ThenTerm, &ElseTerm);
|
||||
|
||||
Builder.SetInsertPoint(ThenTerm);
|
||||
Builder.CreateBr(&ContinuationIP);
|
||||
ThenTerm->eraseFromParent();
|
||||
};
|
||||
|
||||
auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
|
||||
Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
|
||||
++NumPrivatizedVars;
|
||||
|
||||
if (!isa<AllocaInst>(VPtr)) {
|
||||
EXPECT_EQ(&VPtr, F->arg_begin());
|
||||
ReplacementValue = &VPtr;
|
||||
return CodeGenIP;
|
||||
}
|
||||
|
||||
// Trivial copy (=firstprivate).
|
||||
Builder.restoreIP(AllocaIP);
|
||||
Type *VTy = VPtr.getType()->getPointerElementType();
|
||||
Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
|
||||
ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
Builder.CreateStore(V, ReplacementValue);
|
||||
return CodeGenIP;
|
||||
};
|
||||
|
||||
auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
|
||||
|
||||
IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
|
||||
Loc, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PB_default, false);
|
||||
|
||||
EXPECT_EQ(NumBodiesGenerated, 1U);
|
||||
EXPECT_EQ(NumPrivatizedVars, 1U);
|
||||
EXPECT_EQ(NumFinalizationPoints, 1U);
|
||||
|
||||
Builder.restoreIP(AfterIP);
|
||||
Builder.CreateRetVoid();
|
||||
|
||||
EXPECT_NE(PrivAI, nullptr);
|
||||
Function *OutlinedFn = PrivAI->getFunction();
|
||||
EXPECT_NE(F, OutlinedFn);
|
||||
EXPECT_FALSE(verifyModule(*M));
|
||||
|
||||
EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
|
||||
EXPECT_EQ(OutlinedFn->arg_size(), 3U);
|
||||
|
||||
EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
|
||||
EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
|
||||
User *Usr = OutlinedFn->user_back();
|
||||
ASSERT_TRUE(isa<ConstantExpr>(Usr));
|
||||
CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
|
||||
ASSERT_NE(ForkCI, nullptr);
|
||||
|
||||
EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
|
||||
EXPECT_EQ(ForkCI->getNumArgOperands(), 4U);
|
||||
EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
|
||||
EXPECT_EQ(ForkCI->getArgOperand(1),
|
||||
ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
|
||||
EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
|
||||
EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
|
||||
}
|
||||
|
||||
TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
|
||||
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
|
||||
OpenMPIRBuilder OMPBuilder(*M);
|
||||
OMPBuilder.initialize();
|
||||
F->setName("func");
|
||||
IRBuilder<> Builder(BB);
|
||||
|
||||
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
|
||||
|
||||
AllocaInst *PrivAI = nullptr;
|
||||
|
||||
unsigned NumBodiesGenerated = 0;
|
||||
unsigned NumPrivatizedVars = 0;
|
||||
unsigned NumFinalizationPoints = 0;
|
||||
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
|
||||
BasicBlock &ContinuationIP) {
|
||||
++NumBodiesGenerated;
|
||||
|
||||
Builder.restoreIP(AllocaIP);
|
||||
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
|
||||
Builder.CreateStore(F->arg_begin(), PrivAI);
|
||||
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
Value *PrivLoad = Builder.CreateLoad(PrivAI, "local.use");
|
||||
Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
|
||||
Instruction *ThenTerm, *ElseTerm;
|
||||
SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
|
||||
&ThenTerm, &ElseTerm);
|
||||
|
||||
Builder.SetInsertPoint(ThenTerm);
|
||||
Builder.CreateBr(&ContinuationIP);
|
||||
ThenTerm->eraseFromParent();
|
||||
};
|
||||
|
||||
auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
|
||||
Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
|
||||
++NumPrivatizedVars;
|
||||
|
||||
if (!isa<AllocaInst>(VPtr)) {
|
||||
EXPECT_EQ(&VPtr, F->arg_begin());
|
||||
ReplacementValue = &VPtr;
|
||||
return CodeGenIP;
|
||||
}
|
||||
|
||||
// Trivial copy (=firstprivate).
|
||||
Builder.restoreIP(AllocaIP);
|
||||
Type *VTy = VPtr.getType()->getPointerElementType();
|
||||
Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
|
||||
ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
Builder.CreateStore(V, ReplacementValue);
|
||||
return CodeGenIP;
|
||||
};
|
||||
|
||||
auto FiniCB = [&](InsertPointTy CodeGenIP) {
|
||||
++NumFinalizationPoints;
|
||||
// No destructors.
|
||||
};
|
||||
|
||||
IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
|
||||
Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
|
||||
nullptr, OMP_PB_default, false);
|
||||
|
||||
EXPECT_EQ(NumBodiesGenerated, 1U);
|
||||
EXPECT_EQ(NumPrivatizedVars, 1U);
|
||||
EXPECT_EQ(NumFinalizationPoints, 1U);
|
||||
|
||||
Builder.restoreIP(AfterIP);
|
||||
Builder.CreateRetVoid();
|
||||
|
||||
EXPECT_NE(PrivAI, nullptr);
|
||||
Function *OutlinedFn = PrivAI->getFunction();
|
||||
EXPECT_NE(F, OutlinedFn);
|
||||
EXPECT_FALSE(verifyModule(*M, &errs()));
|
||||
|
||||
EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
|
||||
EXPECT_EQ(OutlinedFn->arg_size(), 3U);
|
||||
|
||||
EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
|
||||
ASSERT_EQ(OutlinedFn->getNumUses(), 2U);
|
||||
|
||||
CallInst *DirectCI = nullptr;
|
||||
CallInst *ForkCI = nullptr;
|
||||
for (User *Usr : OutlinedFn->users()) {
|
||||
if (isa<CallInst>(Usr)) {
|
||||
ASSERT_EQ(DirectCI, nullptr);
|
||||
DirectCI = cast<CallInst>(Usr);
|
||||
} else {
|
||||
ASSERT_TRUE(isa<ConstantExpr>(Usr));
|
||||
ASSERT_EQ(Usr->getNumUses(), 1U);
|
||||
ASSERT_TRUE(isa<CallInst>(Usr->user_back()));
|
||||
ForkCI = cast<CallInst>(Usr->user_back());
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
|
||||
EXPECT_EQ(ForkCI->getNumArgOperands(), 4U);
|
||||
EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
|
||||
EXPECT_EQ(ForkCI->getArgOperand(1),
|
||||
ConstantInt::get(Type::getInt32Ty(Ctx), 1));
|
||||
EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
|
||||
|
||||
EXPECT_EQ(DirectCI->getCalledFunction(), OutlinedFn);
|
||||
EXPECT_EQ(DirectCI->getNumArgOperands(), 3U);
|
||||
EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(0)));
|
||||
EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(1)));
|
||||
EXPECT_EQ(DirectCI->getArgOperand(2), F->arg_begin());
|
||||
}
|
||||
|
||||
TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
|
||||
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
|
||||
OpenMPIRBuilder OMPBuilder(*M);
|
||||
OMPBuilder.initialize();
|
||||
F->setName("func");
|
||||
IRBuilder<> Builder(BB);
|
||||
|
||||
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
|
||||
|
||||
unsigned NumBodiesGenerated = 0;
|
||||
unsigned NumPrivatizedVars = 0;
|
||||
unsigned NumFinalizationPoints = 0;
|
||||
|
||||
CallInst *CheckedBarrier = nullptr;
|
||||
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
|
||||
BasicBlock &ContinuationIP) {
|
||||
++NumBodiesGenerated;
|
||||
|
||||
Builder.restoreIP(CodeGenIP);
|
||||
|
||||
// Create three barriers, two cancel barriers but only one checked.
|
||||
Function *CBFn, *BFn;
|
||||
|
||||
Builder.restoreIP(
|
||||
OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel));
|
||||
|
||||
CBFn = M->getFunction("__kmpc_cancel_barrier");
|
||||
BFn = M->getFunction("__kmpc_barrier");
|
||||
ASSERT_NE(CBFn, nullptr);
|
||||
ASSERT_EQ(BFn, nullptr);
|
||||
ASSERT_EQ(CBFn->getNumUses(), 1U);
|
||||
ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
|
||||
ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U);
|
||||
CheckedBarrier = cast<CallInst>(CBFn->user_back());
|
||||
|
||||
Builder.restoreIP(
|
||||
OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel, true));
|
||||
CBFn = M->getFunction("__kmpc_cancel_barrier");
|
||||
BFn = M->getFunction("__kmpc_barrier");
|
||||
ASSERT_NE(CBFn, nullptr);
|
||||
ASSERT_NE(BFn, nullptr);
|
||||
ASSERT_EQ(CBFn->getNumUses(), 1U);
|
||||
ASSERT_EQ(BFn->getNumUses(), 1U);
|
||||
ASSERT_TRUE(isa<CallInst>(BFn->user_back()));
|
||||
ASSERT_EQ(BFn->user_back()->getNumUses(), 0U);
|
||||
|
||||
Builder.restoreIP(OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel,
|
||||
false, false));
|
||||
ASSERT_EQ(CBFn->getNumUses(), 2U);
|
||||
ASSERT_EQ(BFn->getNumUses(), 1U);
|
||||
ASSERT_TRUE(CBFn->user_back() != CheckedBarrier);
|
||||
ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
|
||||
ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U);
|
||||
};
|
||||
|
||||
auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V,
|
||||
Value *&) -> InsertPointTy {
|
||||
++NumPrivatizedVars;
|
||||
llvm_unreachable("No privatization callback call expected!");
|
||||
};
|
||||
|
||||
FunctionType *FakeDestructorTy =
|
||||
FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
|
||||
/*isVarArg=*/false);
|
||||
auto *FakeDestructor = Function::Create(
|
||||
FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get());
|
||||
|
||||
auto FiniCB = [&](InsertPointTy IP) {
|
||||
++NumFinalizationPoints;
|
||||
Builder.restoreIP(IP);
|
||||
Builder.CreateCall(FakeDestructor,
|
||||
{Builder.getInt32(NumFinalizationPoints)});
|
||||
};
|
||||
|
||||
IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
|
||||
Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
|
||||
nullptr, OMP_PB_default, true);
|
||||
|
||||
EXPECT_EQ(NumBodiesGenerated, 1U);
|
||||
EXPECT_EQ(NumPrivatizedVars, 0U);
|
||||
EXPECT_EQ(NumFinalizationPoints, 2U);
|
||||
EXPECT_EQ(FakeDestructor->getNumUses(), 2U);
|
||||
|
||||
Builder.restoreIP(AfterIP);
|
||||
Builder.CreateRetVoid();
|
||||
|
||||
EXPECT_FALSE(verifyModule(*M, &errs()));
|
||||
|
||||
BasicBlock *ExitBB = nullptr;
|
||||
for (const User *Usr : FakeDestructor->users()) {
|
||||
const CallInst *CI = dyn_cast<CallInst>(Usr);
|
||||
ASSERT_EQ(CI->getCalledFunction(), FakeDestructor);
|
||||
ASSERT_TRUE(isa<BranchInst>(CI->getNextNode()));
|
||||
ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U);
|
||||
if (ExitBB)
|
||||
ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB);
|
||||
else
|
||||
ExitBB = CI->getNextNode()->getSuccessor(0);
|
||||
ASSERT_EQ(ExitBB->size(), 1U);
|
||||
ASSERT_TRUE(isa<ReturnInst>(ExitBB->front()));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
Loading…
Reference in New Issue