[OpenMP][IR-Builder] Introduce "pragma omp parallel" code generation

This patch combines the `emitParallel` logic prototyped in D61953 with
the OpenMPIRBuilder (D69785) and introduces `CreateParallel`.

Reviewed By: fghanim

Differential Revision: https://reviews.llvm.org/D70109
This commit is contained in:
Johannes Doerfert 2019-12-25 16:59:38 -06:00
parent 2e6c15d1e7
commit e4add9727b
6 changed files with 724 additions and 14 deletions

View File

@ -49,6 +49,16 @@ enum class RuntimeFunction {
#define OMP_RTL(Enum, ...) constexpr auto Enum = omp::RuntimeFunction::Enum;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
/// IDs for the different proc bind kinds.
enum class ProcBindKind {
#define OMP_PROC_BIND_KIND(Enum, Str, Value) Enum = Value,
#include "llvm/Frontend/OpenMP/OMPKinds.def"
};
#define OMP_PROC_BIND_KIND(Enum, ...) \
constexpr auto Enum = omp::ProcBindKind::Enum;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
/// IDs for all omp runtime library ident_t flag encodings (see
/// their defintion in openmp/runtime/src/kmp.h).
enum class IdentFlag {
@ -67,8 +77,8 @@ Directive getOpenMPDirectiveKind(StringRef Str);
StringRef getOpenMPDirectiveName(Directive D);
/// Forward declarations for LLVM-IR types (simple, function and structure) are
/// generated below. Their names are defined and used in OpenMPKinds.def. Here
/// we provide the forward declarations, the initializeTypes function will
/// generated below. Their names are defined and used in OpenMP/OMPKinds.def.
/// Here we provide the forward declarations, the initializeTypes function will
/// provide the values.
///
///{
@ -83,10 +93,10 @@ namespace types {
extern PointerType *VarName##Ptr;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
/// Helper to initialize all types defined in OpenMPKinds.def.
/// Helper to initialize all types defined in OpenMP/OMPKinds.def.
void initializeTypes(Module &M);
/// Helper to uninitialize all types defined in OpenMPKinds.def.
/// Helper to uninitialize all types defined in OpenMP/OMPKinds.def.
void uninitializeTypes();
} // namespace types

View File

@ -75,6 +75,40 @@ public:
/// NOTE: Temporary solution until Clang CG is gone.
void popFinalizationCB() { FinalizationStack.pop_back(); }
/// Callback type for body (=inner region) code generation
///
/// The callback takes code locations as arguments, each describing a
/// location at which code might need to be generated or a location that is
/// the target of control transfer.
///
/// \param AllocaIP is the insertion point at which new alloca instructions
/// should be placed.
/// \param CodeGenIP is the insertion point at which the body code should be
/// placed.
/// \param ContinuationBB is the basic block target to leave the body.
///
/// Note that all blocks pointed to by the arguments have terminators.
using BodyGenCallbackTy = function_ref<void(
InsertPointTy /* AllocaIP */, InsertPointTy /* CodeGenIP */,
BasicBlock & /* ContinuationBB */)>;
/// Callback type for variable privatization (think copy & default
/// constructor).
///
/// \param AllocaIP is the insertion point at which new alloca instructions
/// should be placed.
/// \param CodeGenIP is the insertion point at which the privatization code
/// should be placed.
/// \param Val The value beeing copied/created.
/// \param ReplVal The replacement value, thus a copy or new created version
/// of \p Val.
///
/// \returns The new insertion point where code generation continues and
/// \p ReplVal the replacement of \p Val.
using PrivatizeCallbackTy = function_ref<InsertPointTy(
InsertPointTy /* AllocaIP */, InsertPointTy /* CodeGenIP */,
Value & /* Val */, Value *& /* ReplVal */)>;
/// Description of a LLVM-IR insertion point (IP) and a debug/source location
/// (filename, line, column, ...).
struct LocationDescription {
@ -105,6 +139,24 @@ public:
bool ForceSimpleCall = false,
bool CheckCancelFlag = true);
/// Generator for '#omp parallel'
///
/// \param Loc The insert and source location description.
/// \param BodyGenCB Callback that will generate the region code.
/// \param PrivCB Callback to copy a given variable (think copy constructor).
/// \param FiniCB Callback to finalize variable copies.
/// \param IfCondition The evaluated 'if' clause expression, if any.
/// \param NumThreads The evaluated 'num_threads' clause expression, if any.
/// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
/// \param IsCancellable Flag to indicate a cancellable parallel region.
///
/// \returns The insertion position *after* the parallel.
IRBuilder<>::InsertPoint
CreateParallel(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB,
Value *IfCondition, Value *NumThreads,
omp::ProcBindKind ProcBind, bool IsCancellable);
///}
private:

View File

@ -167,6 +167,11 @@ __OMP_RTL(__kmpc_barrier, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_cancel_barrier, false, Int32, IdentPtr, Int32)
__OMP_RTL(__kmpc_global_thread_num, false, Int32, IdentPtr)
__OMP_RTL(__kmpc_fork_call, true, Void, IdentPtr, Int32, ParallelTaskPtr)
__OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */Int32)
__OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */Int32)
__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
__OMP_RTL(omp_get_thread_num, false, Int32, )
#undef __OMP_RTL
@ -234,3 +239,26 @@ __OMP_IDENT_FLAG(BARRIER_IMPL_WORKSHARE, 0x01C0)
#undef OMP_IDENT_FLAG
///}
/// Proc bind kinds
///
///{
#ifndef OMP_PROC_BIND_KIND
#define OMP_PROC_BIND_KIND(Enum, Str, Value)
#endif
#define __OMP_PROC_BIND_KIND(Name, Value) \
OMP_PROC_BIND_KIND(OMP_PB_##Name, #Name, Value)
__OMP_PROC_BIND_KIND(master, 2)
__OMP_PROC_BIND_KIND(close, 3)
__OMP_PROC_BIND_KIND(spread, 4)
__OMP_PROC_BIND_KIND(default, 6)
__OMP_PROC_BIND_KIND(unknown, 7)
#undef __OMP_PROC_BIND_KIND
#undef OMP_PROC_BIND_KIND
///}

View File

@ -16,10 +16,13 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
#include <sstream>
@ -216,8 +219,17 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
if (UseCancelBarrier && CheckCancelFlag) {
// For a cancel barrier we create two new blocks.
BasicBlock *BB = Builder.GetInsertBlock();
BasicBlock *NonCancellationBlock = BasicBlock::Create(
BB->getContext(), BB->getName() + ".cont", BB->getParent());
BasicBlock *NonCancellationBlock;
if (Builder.GetInsertPoint() == BB->end()) {
// TODO: This branch will not be needed once we moved to the
// OpenMPIRBuilder codegen completely.
NonCancellationBlock = BasicBlock::Create(
BB->getContext(), BB->getName() + ".cont", BB->getParent());
} else {
NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
BB->getTerminator()->eraseFromParent();
Builder.SetInsertPoint(BB);
}
BasicBlock *CancellationBlock = BasicBlock::Create(
BB->getContext(), BB->getName() + ".cncl", BB->getParent());
@ -233,8 +245,310 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
FI.FiniCB(Builder.saveIP());
// The continuation block is where code generation continues.
Builder.SetInsertPoint(NonCancellationBlock);
Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
}
return Builder.saveIP();
}
IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
if (!updateToLocation(Loc))
return Loc.IP;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadID = getOrCreateThreadID(Ident);
if (NumThreads) {
// Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
Value *Args[] = {
Ident, ThreadID,
Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
Builder.CreateCall(
getOrCreateRuntimeFunction(OMPRTL___kmpc_push_num_threads), Args);
}
if (ProcBind != OMP_PB_default) {
// Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
Value *Args[] = {
Ident, ThreadID,
ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
Builder.CreateCall(getOrCreateRuntimeFunction(OMPRTL___kmpc_push_proc_bind),
Args);
}
BasicBlock *InsertBB = Builder.GetInsertBlock();
Function *OuterFn = InsertBB->getParent();
// Vector to remember instructions we used only during the modeling but which
// we want to delete at the end.
SmallVector<Instruction *, 4> ToBeDeleted;
Builder.SetInsertPoint(OuterFn->getEntryBlock().getFirstNonPHI());
AllocaInst *TIDAddr = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
AllocaInst *ZeroAddr = Builder.CreateAlloca(Int32, nullptr, "zero.addr");
// If there is an if condition we actually use the TIDAddr and ZeroAddr in the
// program, otherwise we only need them for modeling purposes to get the
// associated arguments in the outlined function. In the former case,
// initialize the allocas properly, in the latter case, delete them later.
if (IfCondition) {
Builder.CreateStore(Constant::getNullValue(Int32), TIDAddr);
Builder.CreateStore(Constant::getNullValue(Int32), ZeroAddr);
} else {
ToBeDeleted.push_back(TIDAddr);
ToBeDeleted.push_back(ZeroAddr);
}
// Create an artificial insertion point that will also ensure the blocks we
// are about to split are not degenerated.
auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
Instruction *ThenTI = UI, *ElseTI = nullptr;
if (IfCondition)
SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
BasicBlock *ThenBB = ThenTI->getParent();
BasicBlock *PRegEntryBB = ThenBB->splitBasicBlock(ThenTI, "omp.par.entry");
BasicBlock *PRegBodyBB =
PRegEntryBB->splitBasicBlock(ThenTI, "omp.par.region");
BasicBlock *PRegPreFiniBB =
PRegBodyBB->splitBasicBlock(ThenTI, "omp.par.pre_finalize");
BasicBlock *PRegExitBB =
PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.par.exit");
auto FiniCBWrapper = [&](InsertPointTy IP) {
// Hide "open-ended" blocks from the given FiniCB by setting the right jump
// target to the region exit block.
if (IP.getBlock()->end() == IP.getPoint()) {
IRBuilder<>::InsertPointGuard IPG(Builder);
Builder.restoreIP(IP);
Instruction *I = Builder.CreateBr(PRegExitBB);
IP = InsertPointTy(I->getParent(), I->getIterator());
}
assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
"Unexpected insertion point for finalization call!");
return FiniCB(IP);
};
FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
// Generate the privatization allocas in the block that will become the entry
// of the outlined function.
InsertPointTy AllocaIP(PRegEntryBB,
PRegEntryBB->getTerminator()->getIterator());
Builder.restoreIP(AllocaIP);
AllocaInst *PrivTIDAddr =
Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
Instruction *PrivTID = Builder.CreateLoad(PrivTIDAddr, "tid");
// Add some fake uses for OpenMP provided arguments.
ToBeDeleted.push_back(Builder.CreateLoad(TIDAddr, "tid.addr.use"));
ToBeDeleted.push_back(Builder.CreateLoad(ZeroAddr, "zero.addr.use"));
// ThenBB
// |
// V
// PRegionEntryBB <- Privatization allocas are placed here.
// |
// V
// PRegionBodyBB <- BodeGen is invoked here.
// |
// V
// PRegPreFiniBB <- The block we will start finalization from.
// |
// V
// PRegionExitBB <- A common exit to simplify block collection.
//
LLVM_DEBUG(dbgs() << "Before body codegen: " << *UI->getFunction() << "\n");
// Let the caller create the body.
assert(BodyGenCB && "Expected body generation callback!");
InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
BodyGenCB(AllocaIP, CodeGenIP, *PRegPreFiniBB);
LLVM_DEBUG(dbgs() << "After body codegen: " << *UI->getFunction() << "\n");
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
SmallVector<BasicBlock *, 32> ParallelRegionBlocks, Worklist;
ParallelRegionBlockSet.insert(PRegEntryBB);
ParallelRegionBlockSet.insert(PRegExitBB);
// Collect all blocks in-between PRegEntryBB and PRegExitBB.
Worklist.push_back(PRegEntryBB);
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.pop_back_val();
ParallelRegionBlocks.push_back(BB);
for (BasicBlock *SuccBB : successors(BB))
if (ParallelRegionBlockSet.insert(SuccBB).second)
Worklist.push_back(SuccBB);
}
CodeExtractorAnalysisCache CEAC(*OuterFn);
CodeExtractor Extractor(ParallelRegionBlocks, /* DominatorTree */ nullptr,
/* AggregateArgs */ false,
/* BlockFrequencyInfo */ nullptr,
/* BranchProbabilityInfo */ nullptr,
/* AssumptionCache */ nullptr,
/* AllowVarArgs */ true,
/* AllowAlloca */ true,
/* Suffix */ ".omp_par");
// Find inputs to, outputs from the code region.
BasicBlock *CommonExit = nullptr;
SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
LLVM_DEBUG(dbgs() << "Before privatization: " << *UI->getFunction() << "\n");
FunctionCallee TIDRTLFn =
getOrCreateRuntimeFunction(OMPRTL___kmpc_global_thread_num);
auto PrivHelper = [&](Value &V) {
if (&V == TIDAddr || &V == ZeroAddr)
return;
SmallVector<Use *, 8> Uses;
for (Use &U : V.uses())
if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
if (ParallelRegionBlockSet.count(UserI->getParent()))
Uses.push_back(&U);
Value *ReplacementValue = nullptr;
CallInst *CI = dyn_cast<CallInst>(&V);
if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
ReplacementValue = PrivTID;
} else {
Builder.restoreIP(
PrivCB(AllocaIP, Builder.saveIP(), V, ReplacementValue));
assert(ReplacementValue &&
"Expected copy/create callback to set replacement value!");
if (ReplacementValue == &V)
return;
}
for (Use *UPtr : Uses)
UPtr->set(ReplacementValue);
};
for (Value *Input : Inputs) {
LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
PrivHelper(*Input);
}
for (Value *Output : Outputs) {
LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
PrivHelper(*Output);
}
LLVM_DEBUG(dbgs() << "After privatization: " << *UI->getFunction() << "\n");
LLVM_DEBUG({
for (auto *BB : ParallelRegionBlocks)
dbgs() << " PBR: " << BB->getName() << "\n";
});
Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n");
LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
// Remove the artificial entry introduced by the extractor right away, we
// made our own entry block after all.
{
BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
assert(ArtificialEntry.getUniqueSuccessor() == PRegEntryBB);
assert(PRegEntryBB->getUniquePredecessor() == &ArtificialEntry);
PRegEntryBB->moveBefore(&ArtificialEntry);
ArtificialEntry.eraseFromParent();
}
LLVM_DEBUG(dbgs() << "PP Outlined function: " << *OutlinedFn << "\n");
assert(&OutlinedFn->getEntryBlock() == PRegEntryBB);
assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
assert(OutlinedFn->arg_size() >= 2 &&
"Expected at least tid and bounded tid as arguments");
unsigned NumCapturedVars = OutlinedFn->arg_size() - /* tid & bounded tid */ 2;
CallInst *CI = cast<CallInst>(OutlinedFn->user_back());
CI->getParent()->setName("omp_parallel");
Builder.SetInsertPoint(CI);
// Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
Builder.CreateBitCast(OutlinedFn, ParallelTaskPtr)};
SmallVector<Value *, 16> RealArgs;
RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
Builder.CreateCall(RTLFn, RealArgs);
LLVM_DEBUG(dbgs() << "With fork_call placed: "
<< *Builder.GetInsertBlock()->getParent() << "\n");
InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
UI->eraseFromParent();
// Initialize the local TID stack location with the argument value.
Builder.SetInsertPoint(PrivTID);
Function::arg_iterator OutlinedAI = OutlinedFn->arg_begin();
Builder.CreateStore(Builder.CreateLoad(OutlinedAI), PrivTIDAddr);
// If no "if" clause was present we do not need the call created during
// outlining, otherwise we reuse it in the serialized parallel region.
if (!ElseTI) {
CI->eraseFromParent();
} else {
// If an "if" clause was present we are now generating the serialized
// version into the "else" branch.
Builder.SetInsertPoint(ElseTI);
// Build calls __kmpc_serialized_parallel(&Ident, GTid);
Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
Builder.CreateCall(
getOrCreateRuntimeFunction(OMPRTL___kmpc_serialized_parallel),
SerializedParallelCallArgs);
// OutlinedFn(&GTid, &zero, CapturedStruct);
CI->removeFromParent();
Builder.Insert(CI);
// __kmpc_end_serialized_parallel(&Ident, GTid);
Value *EndArgs[] = {Ident, ThreadID};
Builder.CreateCall(
getOrCreateRuntimeFunction(OMPRTL___kmpc_end_serialized_parallel),
EndArgs);
LLVM_DEBUG(dbgs() << "With serialized parallel region: "
<< *Builder.GetInsertBlock()->getParent() << "\n");
}
// Adjust the finalization stack, verify the adjustment, and call the
// finalize function a last time to finalize values between the pre-fini block
// and the exit block if we left the parallel "the normal way".
auto FiniInfo = FinalizationStack.pop_back_val();
(void)FiniInfo;
assert(FiniInfo.DK == OMPD_parallel &&
"Unexpected finalization stack state!");
Instruction *PreFiniTI = PRegPreFiniBB->getTerminator();
assert(PreFiniTI->getNumSuccessors() == 1 &&
PreFiniTI->getSuccessor(0)->size() == 1 &&
isa<ReturnInst>(PreFiniTI->getSuccessor(0)->getTerminator()) &&
"Unexpected CFG structure!");
InsertPointTy PreFiniIP(PRegPreFiniBB, PreFiniTI->getIterator());
FiniCB(PreFiniIP);
for (Instruction *I : ToBeDeleted)
I->eraseFromParent();
return AfterIP;
}

View File

@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS
FrontendOpenMP
Support
Passes
TransformUtils
)
add_llvm_unittest(LLVMFrontendTests

View File

@ -14,6 +14,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "gtest/gtest.h"
using namespace llvm;
@ -99,20 +100,18 @@ TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
}
TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
BasicBlock *CBB = BasicBlock::Create(Ctx, "", F);
new UnreachableInst(Ctx, CBB);
auto FiniCB = [CBB](llvm::OpenMPIRBuilder::InsertPointTy IP) {
assert(IP.getBlock()->end() == IP.getPoint() &&
"Clang CG should cause non-terminated block!");
auto FiniCB = [&](InsertPointTy IP) {
ASSERT_NE(IP.getBlock(), nullptr);
ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
BranchInst::Create(CBB, IP.getBlock());
};
// Emulate an outer parallel.
llvm::OpenMPIRBuilder::FinalizationInfo FI(
{FiniCB, OMPD_parallel, /* HasCancel */ true});
OMPBuilder.pushFinalizationCB(std::move(FI));
OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
IRBuilder<> Builder(BB);
@ -141,6 +140,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
Instruction *BarrierBBTI = Barrier->getParent()->getTerminator();
EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U);
EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock());
EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U);
EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
1U);
EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),
@ -188,4 +188,309 @@ TEST_F(OpenMPIRBuilderTest, DbgLoc) {
return;
EXPECT_EQ(SrcSrc->getAsCString(), ";test.dbg;foo;3;7;;");
}
TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
AllocaInst *PrivAI = nullptr;
unsigned NumBodiesGenerated = 0;
unsigned NumPrivatizedVars = 0;
unsigned NumFinalizationPoints = 0;
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
BasicBlock &ContinuationIP) {
++NumBodiesGenerated;
Builder.restoreIP(AllocaIP);
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
Builder.CreateStore(F->arg_begin(), PrivAI);
Builder.restoreIP(CodeGenIP);
Value *PrivLoad = Builder.CreateLoad(PrivAI, "local.use");
Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
Instruction *ThenTerm, *ElseTerm;
SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
&ThenTerm, &ElseTerm);
Builder.SetInsertPoint(ThenTerm);
Builder.CreateBr(&ContinuationIP);
ThenTerm->eraseFromParent();
};
auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
++NumPrivatizedVars;
if (!isa<AllocaInst>(VPtr)) {
EXPECT_EQ(&VPtr, F->arg_begin());
ReplacementValue = &VPtr;
return CodeGenIP;
}
// Trivial copy (=firstprivate).
Builder.restoreIP(AllocaIP);
Type *VTy = VPtr.getType()->getPointerElementType();
Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
Builder.restoreIP(CodeGenIP);
Builder.CreateStore(V, ReplacementValue);
return CodeGenIP;
};
auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
Loc, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PB_default, false);
EXPECT_EQ(NumBodiesGenerated, 1U);
EXPECT_EQ(NumPrivatizedVars, 1U);
EXPECT_EQ(NumFinalizationPoints, 1U);
Builder.restoreIP(AfterIP);
Builder.CreateRetVoid();
EXPECT_NE(PrivAI, nullptr);
Function *OutlinedFn = PrivAI->getFunction();
EXPECT_NE(F, OutlinedFn);
EXPECT_FALSE(verifyModule(*M));
EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
EXPECT_EQ(OutlinedFn->arg_size(), 3U);
EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
EXPECT_EQ(OutlinedFn->getNumUses(), 1U);
User *Usr = OutlinedFn->user_back();
ASSERT_TRUE(isa<ConstantExpr>(Usr));
CallInst *ForkCI = dyn_cast<CallInst>(Usr->user_back());
ASSERT_NE(ForkCI, nullptr);
EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
EXPECT_EQ(ForkCI->getNumArgOperands(), 4U);
EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
EXPECT_EQ(ForkCI->getArgOperand(1),
ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
}
TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
AllocaInst *PrivAI = nullptr;
unsigned NumBodiesGenerated = 0;
unsigned NumPrivatizedVars = 0;
unsigned NumFinalizationPoints = 0;
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
BasicBlock &ContinuationIP) {
++NumBodiesGenerated;
Builder.restoreIP(AllocaIP);
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
Builder.CreateStore(F->arg_begin(), PrivAI);
Builder.restoreIP(CodeGenIP);
Value *PrivLoad = Builder.CreateLoad(PrivAI, "local.use");
Value *Cmp = Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
Instruction *ThenTerm, *ElseTerm;
SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
&ThenTerm, &ElseTerm);
Builder.SetInsertPoint(ThenTerm);
Builder.CreateBr(&ContinuationIP);
ThenTerm->eraseFromParent();
};
auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
Value &VPtr, Value *&ReplacementValue) -> InsertPointTy {
++NumPrivatizedVars;
if (!isa<AllocaInst>(VPtr)) {
EXPECT_EQ(&VPtr, F->arg_begin());
ReplacementValue = &VPtr;
return CodeGenIP;
}
// Trivial copy (=firstprivate).
Builder.restoreIP(AllocaIP);
Type *VTy = VPtr.getType()->getPointerElementType();
Value *V = Builder.CreateLoad(VTy, &VPtr, VPtr.getName() + ".reload");
ReplacementValue = Builder.CreateAlloca(VTy, 0, VPtr.getName() + ".copy");
Builder.restoreIP(CodeGenIP);
Builder.CreateStore(V, ReplacementValue);
return CodeGenIP;
};
auto FiniCB = [&](InsertPointTy CodeGenIP) {
++NumFinalizationPoints;
// No destructors.
};
IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
nullptr, OMP_PB_default, false);
EXPECT_EQ(NumBodiesGenerated, 1U);
EXPECT_EQ(NumPrivatizedVars, 1U);
EXPECT_EQ(NumFinalizationPoints, 1U);
Builder.restoreIP(AfterIP);
Builder.CreateRetVoid();
EXPECT_NE(PrivAI, nullptr);
Function *OutlinedFn = PrivAI->getFunction();
EXPECT_NE(F, OutlinedFn);
EXPECT_FALSE(verifyModule(*M, &errs()));
EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
EXPECT_EQ(OutlinedFn->arg_size(), 3U);
EXPECT_EQ(&OutlinedFn->getEntryBlock(), PrivAI->getParent());
ASSERT_EQ(OutlinedFn->getNumUses(), 2U);
CallInst *DirectCI = nullptr;
CallInst *ForkCI = nullptr;
for (User *Usr : OutlinedFn->users()) {
if (isa<CallInst>(Usr)) {
ASSERT_EQ(DirectCI, nullptr);
DirectCI = cast<CallInst>(Usr);
} else {
ASSERT_TRUE(isa<ConstantExpr>(Usr));
ASSERT_EQ(Usr->getNumUses(), 1U);
ASSERT_TRUE(isa<CallInst>(Usr->user_back()));
ForkCI = cast<CallInst>(Usr->user_back());
}
}
EXPECT_EQ(ForkCI->getCalledFunction()->getName(), "__kmpc_fork_call");
EXPECT_EQ(ForkCI->getNumArgOperands(), 4U);
EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
EXPECT_EQ(ForkCI->getArgOperand(1),
ConstantInt::get(Type::getInt32Ty(Ctx), 1));
EXPECT_EQ(ForkCI->getArgOperand(3), F->arg_begin());
EXPECT_EQ(DirectCI->getCalledFunction(), OutlinedFn);
EXPECT_EQ(DirectCI->getNumArgOperands(), 3U);
EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(0)));
EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(1)));
EXPECT_EQ(DirectCI->getArgOperand(2), F->arg_begin());
}
TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
unsigned NumBodiesGenerated = 0;
unsigned NumPrivatizedVars = 0;
unsigned NumFinalizationPoints = 0;
CallInst *CheckedBarrier = nullptr;
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
BasicBlock &ContinuationIP) {
++NumBodiesGenerated;
Builder.restoreIP(CodeGenIP);
// Create three barriers, two cancel barriers but only one checked.
Function *CBFn, *BFn;
Builder.restoreIP(
OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel));
CBFn = M->getFunction("__kmpc_cancel_barrier");
BFn = M->getFunction("__kmpc_barrier");
ASSERT_NE(CBFn, nullptr);
ASSERT_EQ(BFn, nullptr);
ASSERT_EQ(CBFn->getNumUses(), 1U);
ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U);
CheckedBarrier = cast<CallInst>(CBFn->user_back());
Builder.restoreIP(
OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel, true));
CBFn = M->getFunction("__kmpc_cancel_barrier");
BFn = M->getFunction("__kmpc_barrier");
ASSERT_NE(CBFn, nullptr);
ASSERT_NE(BFn, nullptr);
ASSERT_EQ(CBFn->getNumUses(), 1U);
ASSERT_EQ(BFn->getNumUses(), 1U);
ASSERT_TRUE(isa<CallInst>(BFn->user_back()));
ASSERT_EQ(BFn->user_back()->getNumUses(), 0U);
Builder.restoreIP(OMPBuilder.CreateBarrier(Builder.saveIP(), OMPD_parallel,
false, false));
ASSERT_EQ(CBFn->getNumUses(), 2U);
ASSERT_EQ(BFn->getNumUses(), 1U);
ASSERT_TRUE(CBFn->user_back() != CheckedBarrier);
ASSERT_TRUE(isa<CallInst>(CBFn->user_back()));
ASSERT_EQ(CBFn->user_back()->getNumUses(), 0U);
};
auto PrivCB = [&](InsertPointTy, InsertPointTy, Value &V,
Value *&) -> InsertPointTy {
++NumPrivatizedVars;
llvm_unreachable("No privatization callback call expected!");
};
FunctionType *FakeDestructorTy =
FunctionType::get(Type::getVoidTy(Ctx), {Type::getInt32Ty(Ctx)},
/*isVarArg=*/false);
auto *FakeDestructor = Function::Create(
FakeDestructorTy, Function::ExternalLinkage, "fakeDestructor", M.get());
auto FiniCB = [&](InsertPointTy IP) {
++NumFinalizationPoints;
Builder.restoreIP(IP);
Builder.CreateCall(FakeDestructor,
{Builder.getInt32(NumFinalizationPoints)});
};
IRBuilder<>::InsertPoint AfterIP = OMPBuilder.CreateParallel(
Loc, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
nullptr, OMP_PB_default, true);
EXPECT_EQ(NumBodiesGenerated, 1U);
EXPECT_EQ(NumPrivatizedVars, 0U);
EXPECT_EQ(NumFinalizationPoints, 2U);
EXPECT_EQ(FakeDestructor->getNumUses(), 2U);
Builder.restoreIP(AfterIP);
Builder.CreateRetVoid();
EXPECT_FALSE(verifyModule(*M, &errs()));
BasicBlock *ExitBB = nullptr;
for (const User *Usr : FakeDestructor->users()) {
const CallInst *CI = dyn_cast<CallInst>(Usr);
ASSERT_EQ(CI->getCalledFunction(), FakeDestructor);
ASSERT_TRUE(isa<BranchInst>(CI->getNextNode()));
ASSERT_EQ(CI->getNextNode()->getNumSuccessors(), 1U);
if (ExitBB)
ASSERT_EQ(CI->getNextNode()->getSuccessor(0), ExitBB);
else
ExitBB = CI->getNextNode()->getSuccessor(0);
ASSERT_EQ(ExitBB->size(), 1U);
ASSERT_TRUE(isa<ReturnInst>(ExitBB->front()));
}
}
} // namespace