[OpenMP] Use the OpenMPIRBuilder for `omp parallel`

This allows to use the OpenMPIRBuilder for parallel regions. Code was
extracted from D61953 and adapted to work with the new version (D70109).

All but one feature should be supported. An update of this patch will
provide test coverage and privatization other than shared.

Reviewed By: fghanim

Differential Revision: https://reviews.llvm.org/D70290
This commit is contained in:
Johannes Doerfert 2019-12-26 11:23:38 -06:00
parent 000c6a5038
commit 10fedd94b4
7 changed files with 274 additions and 95 deletions

View File

@ -21,6 +21,7 @@
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/Basic/PrettyStackTrace.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
@ -1318,6 +1319,87 @@ static void emitEmptyBoundParameters(CodeGenFunction &,
llvm::SmallVectorImpl<llvm::Value *> &) {}
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
// Check if we have any if clause associated with the directive.
llvm::Value *IfCond = nullptr;
if (const auto *C = S.getSingleClause<OMPIfClause>())
IfCond = EmitScalarExpr(C->getCondition(),
/*IgnoreResultAssign=*/true);
llvm::Value *NumThreads = nullptr;
if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
/*IgnoreResultAssign=*/true);
ProcBindKind ProcBind = OMP_PROC_BIND_default;
if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
ProcBind = ProcBindClause->getProcBindKind();
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
// The cleanup callback that finalizes all variabels at the given location,
// thus calls destructors etc.
auto FiniCB = [this](InsertPointTy IP) {
CGBuilderTy::InsertPointGuard IPG(Builder);
assert(IP.getBlock()->end() != IP.getPoint() &&
"OpenMP IR Builder should cause terminated block!");
llvm::BasicBlock *IPBB = IP.getBlock();
llvm::BasicBlock *DestBB = IPBB->splitBasicBlock(IP.getPoint());
IPBB->getTerminator()->eraseFromParent();
Builder.SetInsertPoint(IPBB);
CodeGenFunction::JumpDest Dest = getJumpDestInCurrentScope(DestBB);
EmitBranchThroughCleanup(Dest);
};
// Privatization callback that performs appropriate action for
// shared/private/firstprivate/lastprivate/copyin/... variables.
//
// TODO: This defaults to shared right now.
auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
llvm::Value &Val, llvm::Value *&ReplVal) {
// The next line is appropriate only for variables (Val) with the
// data-sharing attribute "shared".
ReplVal = &Val;
return CodeGenIP;
};
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
auto BodyGenCB = [ParallelRegionBodyStmt,
this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
llvm::BasicBlock &ContinuationBB) {
auto OldAllocaIP = AllocaInsertPt;
AllocaInsertPt = &*AllocaIP.getPoint();
auto OldReturnBlock = ReturnBlock;
ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB);
llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint());
llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator();
CodeGenIPBBTI->removeFromParent();
Builder.SetInsertPoint(CodeGenIPBB);
EmitStmt(ParallelRegionBodyStmt);
Builder.Insert(CodeGenIPBBTI);
AllocaInsertPt = OldAllocaIP;
ReturnBlock = OldReturnBlock;
};
CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB,
FiniCB, IfCond, NumThreads,
ProcBind, S.hasCancel()));
return;
}
// Emit parallel region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
@ -4747,6 +4829,19 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
break;
}
}
if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
// TODO: This check is necessary as we only generate `omp parallel` through
// the OpenMPIRBuilder for now.
if (S.getCancelRegion() == OMPD_parallel) {
llvm::Value *IfCondition = nullptr;
if (IfCond)
IfCondition = EmitScalarExpr(IfCond,
/*IgnoreResultAssign=*/true);
return Builder.restoreIP(
OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion()));
}
}
CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
S.getCancelRegion());
}

View File

@ -1,10 +1,10 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,CHECK
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,CHECK
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,IRBUILDER
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
@ -16,7 +16,7 @@
float flag;
int main (int argc, char **argv) {
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
// ALL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
#pragma omp parallel
{
#pragma omp cancel parallel if(flag)
@ -24,15 +24,15 @@ int main (int argc, char **argv) {
#pragma omp barrier
argv[0][0] += argc;
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
#pragma omp sections
{
#pragma omp cancel sections
}
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call i32 @__kmpc_cancel(
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_barrier(%struct.ident_t*
// ALL: call void @__kmpc_for_static_init_4(
// ALL: call i32 @__kmpc_cancel(
// ALL: call void @__kmpc_for_static_fini(
// ALL: call void @__kmpc_barrier(%struct.ident_t*
#pragma omp sections
{
#pragma omp cancel sections
@ -41,53 +41,53 @@ int main (int argc, char **argv) {
#pragma omp cancel sections
}
}
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
// CHECK: [[EXIT]]
// CHECK: br label
// CHECK: [[CONTINUE]]
// CHECK: br label
// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
// CHECK: [[EXIT]]
// CHECK: br label
// CHECK: [[CONTINUE]]
// CHECK: br label
// CHECK: call void @__kmpc_for_static_fini(
// ALL: call void @__kmpc_for_static_init_4(
// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
// ALL: [[EXIT]]
// ALL: br label
// ALL: [[CONTINUE]]
// ALL: br label
// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
// ALL: [[EXIT]]
// ALL: br label
// ALL: [[CONTINUE]]
// ALL: br label
// ALL: call void @__kmpc_for_static_fini(
#pragma omp for
for (int i = 0; i < argc; ++i) {
#pragma omp cancel for if(cancel: flag)
}
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: [[FLAG:%.+]] = load float, float* @{{.+}},
// CHECK: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
// CHECK: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
// CHECK: [[THEN]]
// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 2)
// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
// CHECK: [[EXIT]]
// CHECK: br label
// CHECK: [[CONTINUE]]
// CHECK: br label
// CHECK: [[ELSE]]
// CHECK: br label
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_barrier(%struct.ident_t*
// ALL: call void @__kmpc_for_static_init_4(
// ALL: [[FLAG:%.+]] = load float, float* @{{.+}},
// ALL: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
// ALL: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
// ALL: [[THEN]]
// ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 2)
// ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
// ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
// ALL: [[EXIT]]
// ALL: br label
// ALL: [[CONTINUE]]
// ALL: br label
// ALL: [[ELSE]]
// ALL: br label
// ALL: call void @__kmpc_for_static_fini(
// ALL: call void @__kmpc_barrier(%struct.ident_t*
#pragma omp task
{
#pragma omp cancel taskgroup
}
// CHECK: call i8* @__kmpc_omp_task_alloc(
// CHECK: call i32 @__kmpc_omp_task(
// ALL: call i8* @__kmpc_omp_task_alloc(
// ALL: call i32 @__kmpc_omp_task(
#pragma omp parallel sections
{
#pragma omp cancel sections
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
#pragma omp parallel sections
{
#pragma omp cancel sections
@ -96,14 +96,14 @@ for (int i = 0; i < argc; ++i) {
#pragma omp cancel sections
}
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
int r = 0;
#pragma omp parallel for reduction(+: r)
for (int i = 0; i < argc; ++i) {
#pragma omp cancel for
r += i;
}
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
// ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
return argc;
}
@ -173,4 +173,29 @@ for (int i = 0; i < argc; ++i) {
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: ret void
// IRBUILDER: define internal void @main
// IRBUILDER: [[RETURN:omp.par.exit[^:]*]]
// IRBUILDER-NEXT: ret void
// IRBUILDER: [[FLAG:%.+]] = load float, float* @{{.+}},
// IRBUILDER: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
// IRBUILDER: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
// IRBUILDER: [[ELSE]]
// IRBUILDER-NEXT: br label %[[ELSE2:.*]]
// IRBUILDER: [[ELSE2]]
// The barrier directive should now call __kmpc_cancel_barrier
// IRBUILDER: call i32 @__kmpc_cancel_barrier(%struct.ident_t*
// IRBUILDER: br label
// IRBUILDER: [[THEN]]
// IRBUILDER: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1)
// IRBUILDER: [[CMP:%.+]] = icmp eq i32 [[RES]], 0
// IRBUILDER: br i1 [[CMP]], label %[[CONTINUE:[^,].+]], label %[[EXIT:.+]]
// IRBUILDER: [[EXIT]]
// IRBUILDER: br label %[[EXIT2:.+]]
// IRBUILDER: [[EXIT2]]
// IRBUILDER: br label %[[RETURN]]
// IRBUILDER: [[CONTINUE]]
// IRBUILDER: br label %[[ELSE:.+]]
#endif

View File

@ -1,22 +1,31 @@
// RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,CHECK
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,CHECK-DEBUG %s
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,IRBUILDER-DEBUG %s
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
// ALL-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
// ALL-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
// ALL-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
// CHECK-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
// CHECK-DEBUG-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
// CHECK-DEBUG-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
// CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+19]];1;;\00"
// CHECK-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+8]];1;;\00"
// CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+22]];1;;\00"
// CHECK-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+11]];1;;\00"
// IRBUILDER-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
// IRBUILDER-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+19]];0;;\00"
// IRBUILDER-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain<char **>;[[@LINE+8]];0;;\00"
template <class T>
void foo(T argc) {}
@ -39,65 +48,74 @@ int main (int argc, char **argv) {
return tmain(argv);
}
// CHECK-LABEL: define {{[a-z\_\b]*[ ]?i32}} @main({{i32[ ]?[a-z]*}} %argc, i8** %argv)
// CHECK: store i32 %argc, i32* [[ARGC_ADDR:%.+]],
// CHECK: [[VLA:%.+]] = alloca i32, i{{[0-9]+}} [[VLA_SIZE:%[^,]+]],
// ALL-LABEL: define {{[a-z\_\b]*[ ]?i32}} @main({{i32[ ]?[a-z]*}} %argc, i8** %argv)
// ALL: store i32 %argc, i32* [[ARGC_ADDR:%.+]],
// ALL: [[VLA:%.+]] = alloca i32, i{{[0-9]+}} [[VLA_SIZE:%[^,]+]],
// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i{{[0-9]+}}, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i{{[0-9]+}} [[VLA_SIZE]], i32* [[VLA]])
// CHECK-NEXT: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
// CHECK-NEXT: [[RET:%.+]] = call {{[a-z\_\b]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
// CHECK: ret i32
// CHECK-NEXT: }
// CHECK-DEBUG-LABEL: define i32 @main(i32 %argc, i8** %argv)
// IRBUILDER: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* [[VLA]])
// ALL: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
// ALL-NEXT: [[RET:%.+]] = call {{[a-z\_\b]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
// ALL: ret i32
// ALL-NEXT: }
// ALL-DEBUG-LABEL: define i32 @main(i32 %argc, i8** %argv)
// CHECK-DEBUG: [[LOC_2_ADDR:%.+]] = alloca %struct.ident_t
// CHECK-DEBUG: [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[LOC_2_ADDR]] to i8*
// CHECK-DEBUG-NEXT: [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[DEF_LOC_2]] to i8*
// CHECK-DEBUG-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[KMPC_LOC_VOIDPTR]], i8* align 8 [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i1 false)
// CHECK-DEBUG: store i32 %argc, i32* [[ARGC_ADDR:%.+]],
// CHECK-DEBUG: [[VLA:%.+]] = alloca i32, i64 [[VLA_SIZE:%[^,]+]],
// ALL-DEBUG: store i32 %argc, i32* [[ARGC_ADDR:%.+]],
// ALL-DEBUG: [[VLA:%.+]] = alloca i32, i64 [[VLA_SIZE:%[^,]+]],
// CHECK-DEBUG: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %struct.ident_t, %struct.ident_t* [[LOC_2_ADDR]], i32 0, i32 4
// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC1]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]]
// CHECK-DEBUG: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[LOC_2_ADDR]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 [[VLA_SIZE]], i32* [[VLA]])
// CHECK-DEBUG-NEXT: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
// CHECK-DEBUG-NEXT: [[RET:%.+]] = call i32 [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
// CHECK-DEBUG: ret i32
// CHECK-DEBUG-NEXT: }
// IRBUILDER-DEBUG: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* [[VLA]])
// ALL-DEBUG: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
// ALL-DEBUG: [[RET:%.+]] = call i32 [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
// ALL-DEBUG: ret i32
// ALL-DEBUG-NEXT: }
// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i{{[0-9]+}}{{.*}} [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
// CHECK-SAME: #[[FN_ATTRS:[0-9]+]]
// IRBUILDER: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i32* [[VLA_REF:%[^)]+]])
// IRBUILDER-SAME: #[[FN_ATTRS:[0-9]+]]
// CHECK: store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]],
// CHECK: [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]]
// CHECK: [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i{{[0-9]+}} 1
// CHECK-NEXT: [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]]
// ALL: [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i{{[0-9]+}} 1
// ALL-NEXT: [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]]
// CHECK-NEXT: invoke {{.*}}void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[VLA_ELEM]])
// IRBUILDER: call {{.*}}void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[VLA_ELEM]])
// CHECK: ret void
// CHECK: call {{.*}}void @{{.+terminate.*|abort}}(
// CHECK-NEXT: unreachable
// CHECK-NEXT: }
// CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
// CHECK-DEBUG-SAME: #[[FN_ATTRS:[0-9]+]]
// IRBUILDER-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i32* [[VLA_REF:%[^)]+]])
// IRBUILDER-DEBUG-SAME: #[[FN_ATTRS:[0-9]+]]
// CHECK-DEBUG: store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]],
// CHECK-DEBUG: [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]]
// CHECK-DEBUG: [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i64 1
// CHECK-DEBUG-NEXT: [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]]
// ALL-DEBUG: [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i64 1
// ALL-DEBUG-NEXT: [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]]
// CHECK-DEBUG-NEXT: invoke void [[FOO:@.+foo.+]](i32 [[VLA_ELEM]])
// IRBUILDER-DEBUG-NEXT: call void [[FOO:@.+foo.+]](i32 [[VLA_ELEM]])
// CHECK-DEBUG: ret void
// CHECK-DEBUG: call void @{{.+terminate.*|abort}}(
// CHECK-DEBUG-NEXT: unreachable
// CHECK-DEBUG-NEXT: }
// CHECK-DAG: define linkonce_odr {{.*}}void [[FOO]]({{i32[ ]?[a-z]*}} %argc)
// CHECK-DAG: declare !callback ![[cbid:[0-9]+]] {{.*}}void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
// CHECK-DEBUG-DAG: define linkonce_odr void [[FOO]](i32 %argc)
// CHECK-DEBUG-DAG: declare !callback ![[cbid:[0-9]+]] void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
// ALL-DAG: define linkonce_odr {{.*}}void [[FOO]]({{i32[ ]?[a-z]*}} %argc)
// ALL-DAG: declare !callback ![[cbid:[0-9]+]] {{.*}}void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
// ALL-DEBUG-DAG: define linkonce_odr void [[FOO]](i32 %argc)
// ALL-DEBUG-DAG: declare !callback ![[cbid:[0-9]+]] void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
// CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
// CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]
// CHECK: define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc)
// CHECK: store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
// ALL: define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc)
// ALL: store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i{{64|32}})* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i{{64|32}} %{{.+}})
// CHECK-NEXT: ret i32 0
// CHECK-NEXT: }
// CHECK-DEBUG: define linkonce_odr i32 [[TMAIN]](i8** %argc)
// IRBUILDER: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
// ALL: ret i32 0
// ALL-NEXT: }
// ALL-DEBUG: define linkonce_odr i32 [[TMAIN]](i8** %argc)
// CHECK-DEBUG-DAG: [[LOC_2_ADDR:%.+]] = alloca %struct.ident_t
// CHECK-DEBUG: [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[LOC_2_ADDR]] to i8*
// CHECK-DEBUG-NEXT: [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[DEF_LOC_2]] to i8*
@ -106,35 +124,40 @@ int main (int argc, char **argv) {
// CHECK-DEBUG: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %struct.ident_t, %struct.ident_t* [[LOC_2_ADDR]], i32 0, i32 4
// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC2]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]]
// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[LOC_2_ADDR]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
// CHECK-DEBUG-NEXT: ret i32 0
// CHECK-DEBUG-NEXT: }
// IRBUILDER-DEBUG: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
// ALL-DEBUG: ret i32 0
// ALL-DEBUG-NEXT: }
// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i{{64|32}}{{.*}} %{{.+}})
// IRBUILDER: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i8*** [[ARGC_REF:%.*]], i{{64|32}}{{.*}} %{{.+}})
// CHECK: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]],
// CHECK: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]]
// CHECK: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
// ALL: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
// CHECK-NEXT: invoke {{.*}}void [[FOO1:@.+foo.+]](i8** [[ARGC]])
// IRBUILDER-NEXT: call {{.*}}void [[FOO1:@.+foo.+]](i8** [[ARGC]])
// CHECK: ret void
// CHECK: call {{.*}}void @{{.+terminate.*|abort}}(
// CHECK-NEXT: unreachable
// CHECK-NEXT: }
// CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i64 %{{.+}})
// IRBUILDER-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i8*** [[ARGC_REF:%.*]], i64 %{{.+}})
// CHECK-DEBUG: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]],
// CHECK-DEBUG: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]]
// CHECK-DEBUG: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
// ALL-DEBUG: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
// CHECK-DEBUG-NEXT: invoke void [[FOO1:@.+foo.+]](i8** [[ARGC]])
// IRBUILDER-DEBUG-NEXT: call void [[FOO1:@.+foo.+]](i8** [[ARGC]])
// CHECK-DEBUG: ret void
// CHECK-DEBUG: call void @{{.+terminate.*|abort}}(
// CHECK-DEBUG-NEXT: unreachable
// CHECK-DEBUG-NEXT: }
// CHECK: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc)
// ALL: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc)
// CHECK-DEBUG-DAG: define linkonce_odr void [[FOO1]](i8** %argc)
// CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i64 %{{.+}})
// CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]({{[^)]+}}){{[^,]*}}, !dbg
// CHECK: attributes #[[FN_ATTRS]] = {{.+}} nounwind
// CHECK-DEBUG: attributes #[[FN_ATTRS]] = {{.+}} nounwind
// CHECK: ![[cbid]] = !{![[cbidb:[0-9]+]]}
// CHECK: ![[cbidb]] = !{i64 2, i64 -1, i64 -1, i1 true}
// ALL: attributes #[[FN_ATTRS]] = {{.+}} nounwind
// ALL-DEBUG: attributes #[[FN_ATTRS]] = {{.+}} nounwind
// ALL: ![[cbid]] = !{![[cbidb:[0-9]+]]}
// ALL: ![[cbidb]] = !{i64 2, i64 -1, i64 -1, i1 true}
#endif

View File

@ -205,6 +205,9 @@ __OMP_ATTRS_SET(GetterAttrs,
#define __OMP_RTL_ATTRS(Name, FnAttrSet, RetAttrSet, ArgAttrSets) \
OMP_RTL_ATTRS(OMPRTL_##Name, FnAttrSet, RetAttrSet, ArgAttrSets)
__OMP_RTL_ATTRS(__kmpc_fork_call, AttributeSet(EnumAttr(NoUnwind)),
AttributeSet(), {})
__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {})
__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {})

View File

@ -65,7 +65,7 @@ void llvm::omp::types::initializeTypes(Module &M) {
#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
VarName##Ptr = PointerType::getUnqual(T);
VarName##Ptr = PointerType::getUnqual(VarName);
#define OMP_STRUCT_TYPE(VarName, StructName, ...) \
T = M.getTypeByName(StructName); \
if (!T) \

View File

@ -18,6 +18,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
@ -501,10 +502,21 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
dbgs() << " PBR: " << BB->getName() << "\n";
});
// Add some known attributes to the outlined function.
Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
OutlinedFn->addParamAttr(0, Attribute::NoAlias);
OutlinedFn->addParamAttr(1, Attribute::NoAlias);
OutlinedFn->addFnAttr(Attribute::NoUnwind);
OutlinedFn->addFnAttr(Attribute::NoRecurse);
LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n");
LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
// For compability with the clang CG we move the outlined function after the
// one with the parallel region.
OutlinedFn->removeFromParent();
M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
// Remove the artificial entry introduced by the extractor right away, we
// made our own entry block after all.
{
@ -535,6 +547,23 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
llvm::LLVMContext &Ctx = F->getContext();
MDBuilder MDB(Ctx);
// Annotate the callback behavior of the __kmpc_fork_call:
// - The callback callee is argument number 2 (microtask).
// - The first two arguments of the callback callee are unknown (-1).
// - All variadic arguments to the __kmpc_fork_call are passed to the
// callback callee.
F->addMetadata(
llvm::LLVMContext::MD_callback,
*llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2, {-1, -1},
/* VarArgsArePassed */ true)}));
}
}
Builder.CreateCall(RTLFn, RealArgs);
LLVM_DEBUG(dbgs() << "With fork_call placed: "

View File

@ -376,6 +376,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
Function *OutlinedFn = PrivAI->getFunction();
EXPECT_NE(F, OutlinedFn);
EXPECT_FALSE(verifyModule(*M));
EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoRecurse));
EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
EXPECT_EQ(OutlinedFn->arg_size(), 3U);