[OpenMP][IRBuilder] Support nested parallel regions

During code generation we might change/add basic blocks so keeping a
list of them is fairly easy to break. Nested parallel regions were
enough. The new scheme does recompute the list of blocks to be outlined
once it is needed.

Reviewed By: anchu-rajendran

Differential Revision: https://reviews.llvm.org/D82722
This commit is contained in:
Johannes Doerfert 2020-06-28 13:41:33 -05:00
parent 64d99a1d04
commit 7af287d0d9
4 changed files with 158 additions and 31 deletions

View File

@ -175,7 +175,7 @@ for (int i = 0; i < argc; ++i) {
// IRBUILDER: define internal void @main
// IRBUILDER: [[RETURN:omp.par.exit[^:]*]]
// IRBUILDER: [[RETURN:omp.par.outlined.exit[^:]*]]
// IRBUILDER-NEXT: ret void
// IRBUILDER: [[FLAG:%.+]] = load float, float* @{{.+}},
@ -192,10 +192,8 @@ for (int i = 0; i < argc; ++i) {
// IRBUILDER: [[CMP:%.+]] = icmp eq i32 [[RES]], 0
// IRBUILDER: br i1 [[CMP]], label %[[CONTINUE:[^,].+]], label %[[EXIT:.+]]
// IRBUILDER: [[EXIT]]
// IRBUILDER: br label %[[EXIT2:.+]]
// IRBUILDER: [[CONTINUE]]
// IRBUILDER: br label %[[ELSE:.+]]
// IRBUILDER: [[EXIT2]]
// IRBUILDER: br label %[[RETURN]]
// IRBUILDER: [[CONTINUE]]
// IRBUILDER: br label %[[ELSE2:.+]]
#endif

View File

@ -0,0 +1,110 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
// %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o /tmp/t1 %s
// %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch /tmp/t1 -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,IRBUILDER-DEBUG %s
// expected-no-diagnostics
// TODO: Teach the update script to check new functions too.
#ifndef HEADER
#define HEADER
// ALL-LABEL: @_Z17nested_parallel_0v(
// ALL-NEXT: entry:
// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
// ALL-NEXT: br label [[OMP_PARALLEL:%.*]]
// ALL: omp_parallel:
// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z17nested_parallel_0v..omp_par.1 to void (i32*, i32*, ...)*))
// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT12:%.*]]
// ALL: omp.par.outlined.exit12:
// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
// ALL: omp.par.exit.split:
// ALL-NEXT: ret void
//
void nested_parallel_0(void) {
#pragma omp parallel
{
#pragma omp parallel
{
}
}
}
// ALL-LABEL: @_Z17nested_parallel_1Pfid(
// ALL-NEXT: entry:
// ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
// ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
// ALL-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8
// ALL-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
// ALL-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
// ALL-NEXT: br label [[OMP_PARALLEL:%.*]]
// ALL: omp_parallel:
// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT13:%.*]]
// ALL: omp.par.outlined.exit13:
// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
// ALL: omp.par.exit.split:
// ALL-NEXT: ret void
//
void nested_parallel_1(float *r, int a, double b) {
#pragma omp parallel
{
#pragma omp parallel
{
*r = a + b;
}
}
}
// ALL-LABEL: @_Z17nested_parallel_2Pfid(
// ALL-NEXT: entry:
// ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
// ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
// ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
// ALL-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8
// ALL-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
// ALL-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
// ALL-NEXT: br label [[OMP_PARALLEL:%.*]]
// ALL: omp_parallel:
// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT55:%.*]]
// ALL: omp.par.outlined.exit55:
// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
// ALL: omp.par.exit.split:
// ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
// ALL-NEXT: [[CONV56:%.*]] = sitofp i32 [[TMP0]] to double
// ALL-NEXT: [[TMP1:%.*]] = load double, double* [[B_ADDR]], align 8
// ALL-NEXT: [[ADD57:%.*]] = fadd double [[CONV56]], [[TMP1]]
// ALL-NEXT: [[CONV58:%.*]] = fptrunc double [[ADD57]] to float
// ALL-NEXT: [[TMP2:%.*]] = load float*, float** [[R_ADDR]], align 8
// ALL-NEXT: store float [[CONV58]], float* [[TMP2]], align 4
// ALL-NEXT: ret void
//
void nested_parallel_2(float *r, int a, double b) {
#pragma omp parallel
{
*r = a + b;
#pragma omp parallel
{
*r = a + b;
#pragma omp parallel
{
*r = a + b;
}
*r = a + b;
#pragma omp parallel
{
*r = a + b;
}
*r = a + b;
}
*r = a + b;
}
*r = a + b;
}
#endif

View File

@ -285,9 +285,14 @@ public:
/// Helper that contains information about regions we need to outline
/// during finalization.
struct OutlineInfo {
SmallVector<BasicBlock *, 32> Blocks;
using PostOutlineCBTy = std::function<void(Function &)>;
PostOutlineCBTy PostOutlineCB;
BasicBlock *EntryBB, *ExitBB;
/// Collect all blocks in between EntryBB and ExitBB in both the given
/// vector and set.
void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
SmallVectorImpl<BasicBlock *> &BlockVector);
};
/// Collection of regions that need to be outlined during finalization.

View File

@ -127,13 +127,16 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) {
void OpenMPIRBuilder::initialize() { initializeTypes(M); }
void OpenMPIRBuilder::finalize() {
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
SmallVector<BasicBlock *, 32> Blocks;
for (OutlineInfo &OI : OutlineInfos) {
assert(!OI.Blocks.empty() &&
"Outlined regions should have at least a single block!");
BasicBlock *RegEntryBB = OI.Blocks.front();
Function *OuterFn = RegEntryBB->getParent();
ParallelRegionBlockSet.clear();
Blocks.clear();
OI.collectBlocks(ParallelRegionBlockSet, Blocks);
Function *OuterFn = OI.EntryBB->getParent();
CodeExtractorAnalysisCache CEAC(*OuterFn);
CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
/* AggregateArgs */ false,
/* BlockFrequencyInfo */ nullptr,
/* BranchProbabilityInfo */ nullptr,
@ -143,6 +146,8 @@ void OpenMPIRBuilder::finalize() {
/* Suffix */ ".omp_par");
LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
<< " Exit: " << OI.ExitBB->getName() << "\n");
assert(Extractor.isEligible() &&
"Expected OpenMP outlining to be possible!");
@ -162,12 +167,12 @@ void OpenMPIRBuilder::finalize() {
// made our own entry block after all.
{
BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB);
assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry);
RegEntryBB->moveBefore(&ArtificialEntry);
assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
OI.EntryBB->moveBefore(&ArtificialEntry);
ArtificialEntry.eraseFromParent();
}
assert(&OutlinedFn->getEntryBlock() == RegEntryBB);
assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
// Run a user callback, e.g. to add attributes.
@ -614,20 +619,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
FiniCB(PreFiniIP);
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
SmallVector<BasicBlock *, 32> Worklist;
ParallelRegionBlockSet.insert(PRegEntryBB);
ParallelRegionBlockSet.insert(PRegExitBB);
OI.EntryBB = PRegEntryBB;
OI.ExitBB = PRegExitBB;
// Collect all blocks in-between PRegEntryBB and PRegExitBB.
Worklist.push_back(PRegEntryBB);
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.pop_back_val();
OI.Blocks.push_back(BB);
for (BasicBlock *SuccBB : successors(BB))
if (ParallelRegionBlockSet.insert(SuccBB).second)
Worklist.push_back(SuccBB);
}
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
SmallVector<BasicBlock *, 32> Blocks;
OI.collectBlocks(ParallelRegionBlockSet, Blocks);
// Ensure a single exit node for the outlined region by creating one.
// We might have multiple incoming edges to the exit now due to finalizations,
@ -635,10 +632,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
BasicBlock *PRegOutlinedExitBB = PRegExitBB;
PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
PRegOutlinedExitBB->setName("omp.par.outlined.exit");
OI.Blocks.push_back(PRegOutlinedExitBB);
Blocks.push_back(PRegOutlinedExitBB);
CodeExtractorAnalysisCache CEAC(*OuterFn);
CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
/* AggregateArgs */ false,
/* BlockFrequencyInfo */ nullptr,
/* BranchProbabilityInfo */ nullptr,
@ -694,7 +691,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
LLVM_DEBUG({
for (auto *BB : OI.Blocks)
for (auto *BB : Blocks)
dbgs() << " PBR: " << BB->getName() << "\n";
});
@ -1112,3 +1109,20 @@ void OpenMPIRBuilder::initializeTypes(Module &M) {
VarName##Ptr = PointerType::getUnqual(T);
#include "llvm/Frontend/OpenMP/OMPKinds.def"
}
void OpenMPIRBuilder::OutlineInfo::collectBlocks(
SmallPtrSetImpl<BasicBlock *> &BlockSet,
SmallVectorImpl<BasicBlock *> &BlockVector) {
SmallVector<BasicBlock *, 32> Worklist;
BlockSet.insert(EntryBB);
BlockSet.insert(ExitBB);
Worklist.push_back(EntryBB);
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.pop_back_val();
BlockVector.push_back(BB);
for (BasicBlock *SuccBB : successors(BB))
if (BlockSet.insert(SuccBB).second)
Worklist.push_back(SuccBB);
}
}