forked from OSchip/llvm-project
[OpenMP][IRBuilder] Support nested parallel regions
During code generation we might change/add basic blocks so keeping a list of them is fairly easy to break. Nested parallel regions were enough. The new scheme does recompute the list of blocks to be outlined once it is needed. Reviewed By: anchu-rajendran Differential Revision: https://reviews.llvm.org/D82722
This commit is contained in:
parent
64d99a1d04
commit
7af287d0d9
|
@ -175,7 +175,7 @@ for (int i = 0; i < argc; ++i) {
|
|||
|
||||
// IRBUILDER: define internal void @main
|
||||
|
||||
// IRBUILDER: [[RETURN:omp.par.exit[^:]*]]
|
||||
// IRBUILDER: [[RETURN:omp.par.outlined.exit[^:]*]]
|
||||
// IRBUILDER-NEXT: ret void
|
||||
// IRBUILDER: [[FLAG:%.+]] = load float, float* @{{.+}},
|
||||
|
||||
|
@ -192,10 +192,8 @@ for (int i = 0; i < argc; ++i) {
|
|||
// IRBUILDER: [[CMP:%.+]] = icmp eq i32 [[RES]], 0
|
||||
// IRBUILDER: br i1 [[CMP]], label %[[CONTINUE:[^,].+]], label %[[EXIT:.+]]
|
||||
// IRBUILDER: [[EXIT]]
|
||||
// IRBUILDER: br label %[[EXIT2:.+]]
|
||||
// IRBUILDER: [[CONTINUE]]
|
||||
// IRBUILDER: br label %[[ELSE:.+]]
|
||||
// IRBUILDER: [[EXIT2]]
|
||||
// IRBUILDER: br label %[[RETURN]]
|
||||
// IRBUILDER: [[CONTINUE]]
|
||||
// IRBUILDER: br label %[[ELSE2:.+]]
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,110 @@
|
|||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
|
||||
// %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o /tmp/t1 %s
|
||||
// %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch /tmp/t1 -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,IRBUILDER-DEBUG %s
|
||||
|
||||
// expected-no-diagnostics
|
||||
|
||||
// TODO: Teach the update script to check new functions too.
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// ALL-LABEL: @_Z17nested_parallel_0v(
|
||||
// ALL-NEXT: entry:
|
||||
// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// ALL-NEXT: br label [[OMP_PARALLEL:%.*]]
|
||||
// ALL: omp_parallel:
|
||||
// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z17nested_parallel_0v..omp_par.1 to void (i32*, i32*, ...)*))
|
||||
// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT12:%.*]]
|
||||
// ALL: omp.par.outlined.exit12:
|
||||
// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
||||
// ALL: omp.par.exit.split:
|
||||
// ALL-NEXT: ret void
|
||||
//
|
||||
void nested_parallel_0(void) {
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ALL-LABEL: @_Z17nested_parallel_1Pfid(
|
||||
// ALL-NEXT: entry:
|
||||
// ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
||||
// ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
|
||||
// ALL-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8
|
||||
// ALL-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
||||
// ALL-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
|
||||
// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// ALL-NEXT: br label [[OMP_PARALLEL:%.*]]
|
||||
// ALL: omp_parallel:
|
||||
// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
|
||||
// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT13:%.*]]
|
||||
// ALL: omp.par.outlined.exit13:
|
||||
// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
||||
// ALL: omp.par.exit.split:
|
||||
// ALL-NEXT: ret void
|
||||
//
|
||||
void nested_parallel_1(float *r, int a, double b) {
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
*r = a + b;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ALL-LABEL: @_Z17nested_parallel_2Pfid(
|
||||
// ALL-NEXT: entry:
|
||||
// ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8
|
||||
// ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
|
||||
// ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
|
||||
// ALL-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8
|
||||
// ALL-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
|
||||
// ALL-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
|
||||
// ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// ALL-NEXT: br label [[OMP_PARALLEL:%.*]]
|
||||
// ALL: omp_parallel:
|
||||
// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
|
||||
// ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT55:%.*]]
|
||||
// ALL: omp.par.outlined.exit55:
|
||||
// ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]]
|
||||
// ALL: omp.par.exit.split:
|
||||
// ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
|
||||
// ALL-NEXT: [[CONV56:%.*]] = sitofp i32 [[TMP0]] to double
|
||||
// ALL-NEXT: [[TMP1:%.*]] = load double, double* [[B_ADDR]], align 8
|
||||
// ALL-NEXT: [[ADD57:%.*]] = fadd double [[CONV56]], [[TMP1]]
|
||||
// ALL-NEXT: [[CONV58:%.*]] = fptrunc double [[ADD57]] to float
|
||||
// ALL-NEXT: [[TMP2:%.*]] = load float*, float** [[R_ADDR]], align 8
|
||||
// ALL-NEXT: store float [[CONV58]], float* [[TMP2]], align 4
|
||||
// ALL-NEXT: ret void
|
||||
//
|
||||
void nested_parallel_2(float *r, int a, double b) {
|
||||
#pragma omp parallel
|
||||
{
|
||||
*r = a + b;
|
||||
#pragma omp parallel
|
||||
{
|
||||
*r = a + b;
|
||||
#pragma omp parallel
|
||||
{
|
||||
*r = a + b;
|
||||
}
|
||||
*r = a + b;
|
||||
#pragma omp parallel
|
||||
{
|
||||
*r = a + b;
|
||||
}
|
||||
*r = a + b;
|
||||
}
|
||||
*r = a + b;
|
||||
}
|
||||
*r = a + b;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -285,9 +285,14 @@ public:
|
|||
/// Helper that contains information about regions we need to outline
|
||||
/// during finalization.
|
||||
struct OutlineInfo {
|
||||
SmallVector<BasicBlock *, 32> Blocks;
|
||||
using PostOutlineCBTy = std::function<void(Function &)>;
|
||||
PostOutlineCBTy PostOutlineCB;
|
||||
BasicBlock *EntryBB, *ExitBB;
|
||||
|
||||
/// Collect all blocks in between EntryBB and ExitBB in both the given
|
||||
/// vector and set.
|
||||
void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
|
||||
SmallVectorImpl<BasicBlock *> &BlockVector);
|
||||
};
|
||||
|
||||
/// Collection of regions that need to be outlined during finalization.
|
||||
|
|
|
@ -127,13 +127,16 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) {
|
|||
void OpenMPIRBuilder::initialize() { initializeTypes(M); }
|
||||
|
||||
void OpenMPIRBuilder::finalize() {
|
||||
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
|
||||
SmallVector<BasicBlock *, 32> Blocks;
|
||||
for (OutlineInfo &OI : OutlineInfos) {
|
||||
assert(!OI.Blocks.empty() &&
|
||||
"Outlined regions should have at least a single block!");
|
||||
BasicBlock *RegEntryBB = OI.Blocks.front();
|
||||
Function *OuterFn = RegEntryBB->getParent();
|
||||
ParallelRegionBlockSet.clear();
|
||||
Blocks.clear();
|
||||
OI.collectBlocks(ParallelRegionBlockSet, Blocks);
|
||||
|
||||
Function *OuterFn = OI.EntryBB->getParent();
|
||||
CodeExtractorAnalysisCache CEAC(*OuterFn);
|
||||
CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
|
||||
CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
|
||||
/* AggregateArgs */ false,
|
||||
/* BlockFrequencyInfo */ nullptr,
|
||||
/* BranchProbabilityInfo */ nullptr,
|
||||
|
@ -143,6 +146,8 @@ void OpenMPIRBuilder::finalize() {
|
|||
/* Suffix */ ".omp_par");
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
|
||||
LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
|
||||
<< " Exit: " << OI.ExitBB->getName() << "\n");
|
||||
assert(Extractor.isEligible() &&
|
||||
"Expected OpenMP outlining to be possible!");
|
||||
|
||||
|
@ -162,12 +167,12 @@ void OpenMPIRBuilder::finalize() {
|
|||
// made our own entry block after all.
|
||||
{
|
||||
BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
|
||||
assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB);
|
||||
assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry);
|
||||
RegEntryBB->moveBefore(&ArtificialEntry);
|
||||
assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
|
||||
assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
|
||||
OI.EntryBB->moveBefore(&ArtificialEntry);
|
||||
ArtificialEntry.eraseFromParent();
|
||||
}
|
||||
assert(&OutlinedFn->getEntryBlock() == RegEntryBB);
|
||||
assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
|
||||
assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
|
||||
|
||||
// Run a user callback, e.g. to add attributes.
|
||||
|
@ -614,20 +619,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
|
|||
InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
|
||||
FiniCB(PreFiniIP);
|
||||
|
||||
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
|
||||
SmallVector<BasicBlock *, 32> Worklist;
|
||||
ParallelRegionBlockSet.insert(PRegEntryBB);
|
||||
ParallelRegionBlockSet.insert(PRegExitBB);
|
||||
OI.EntryBB = PRegEntryBB;
|
||||
OI.ExitBB = PRegExitBB;
|
||||
|
||||
// Collect all blocks in-between PRegEntryBB and PRegExitBB.
|
||||
Worklist.push_back(PRegEntryBB);
|
||||
while (!Worklist.empty()) {
|
||||
BasicBlock *BB = Worklist.pop_back_val();
|
||||
OI.Blocks.push_back(BB);
|
||||
for (BasicBlock *SuccBB : successors(BB))
|
||||
if (ParallelRegionBlockSet.insert(SuccBB).second)
|
||||
Worklist.push_back(SuccBB);
|
||||
}
|
||||
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
|
||||
SmallVector<BasicBlock *, 32> Blocks;
|
||||
OI.collectBlocks(ParallelRegionBlockSet, Blocks);
|
||||
|
||||
// Ensure a single exit node for the outlined region by creating one.
|
||||
// We might have multiple incoming edges to the exit now due to finalizations,
|
||||
|
@ -635,10 +632,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
|
|||
BasicBlock *PRegOutlinedExitBB = PRegExitBB;
|
||||
PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
|
||||
PRegOutlinedExitBB->setName("omp.par.outlined.exit");
|
||||
OI.Blocks.push_back(PRegOutlinedExitBB);
|
||||
Blocks.push_back(PRegOutlinedExitBB);
|
||||
|
||||
CodeExtractorAnalysisCache CEAC(*OuterFn);
|
||||
CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
|
||||
CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
|
||||
/* AggregateArgs */ false,
|
||||
/* BlockFrequencyInfo */ nullptr,
|
||||
/* BranchProbabilityInfo */ nullptr,
|
||||
|
@ -694,7 +691,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
|
|||
|
||||
LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
|
||||
LLVM_DEBUG({
|
||||
for (auto *BB : OI.Blocks)
|
||||
for (auto *BB : Blocks)
|
||||
dbgs() << " PBR: " << BB->getName() << "\n";
|
||||
});
|
||||
|
||||
|
@ -1112,3 +1109,20 @@ void OpenMPIRBuilder::initializeTypes(Module &M) {
|
|||
VarName##Ptr = PointerType::getUnqual(T);
|
||||
#include "llvm/Frontend/OpenMP/OMPKinds.def"
|
||||
}
|
||||
|
||||
void OpenMPIRBuilder::OutlineInfo::collectBlocks(
|
||||
SmallPtrSetImpl<BasicBlock *> &BlockSet,
|
||||
SmallVectorImpl<BasicBlock *> &BlockVector) {
|
||||
SmallVector<BasicBlock *, 32> Worklist;
|
||||
BlockSet.insert(EntryBB);
|
||||
BlockSet.insert(ExitBB);
|
||||
|
||||
Worklist.push_back(EntryBB);
|
||||
while (!Worklist.empty()) {
|
||||
BasicBlock *BB = Worklist.pop_back_val();
|
||||
BlockVector.push_back(BB);
|
||||
for (BasicBlock *SuccBB : successors(BB))
|
||||
if (BlockSet.insert(SuccBB).second)
|
||||
Worklist.push_back(SuccBB);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue