[OpenMP] Use the OpenMPIRBuilder for `omp parallel`

This allows to use the OpenMPIRBuilder for parallel regions. Code was
extracted from D61953 and adapted to work with the new version (D70109).

All but one feature should be supported. An update of this patch will
provide test coverage and privatization other than shared.

Reviewed By: fghanim

Differential Revision: https://reviews.llvm.org/D70290
This commit is contained in:
Johannes Doerfert 2019-12-26 11:23:38 -06:00
parent 000c6a5038
commit 10fedd94b4
7 changed files with 274 additions and 95 deletions

View File

@ -21,6 +21,7 @@
#include "clang/AST/Stmt.h" #include "clang/AST/Stmt.h"
#include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtOpenMP.h"
#include "clang/Basic/PrettyStackTrace.h" #include "clang/Basic/PrettyStackTrace.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
using namespace clang; using namespace clang;
using namespace CodeGen; using namespace CodeGen;
using namespace llvm::omp; using namespace llvm::omp;
@ -1318,6 +1319,87 @@ static void emitEmptyBoundParameters(CodeGenFunction &,
llvm::SmallVectorImpl<llvm::Value *> &) {} llvm::SmallVectorImpl<llvm::Value *> &) {}
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
// Check if we have any if clause associated with the directive.
llvm::Value *IfCond = nullptr;
if (const auto *C = S.getSingleClause<OMPIfClause>())
IfCond = EmitScalarExpr(C->getCondition(),
/*IgnoreResultAssign=*/true);
llvm::Value *NumThreads = nullptr;
if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
NumThreads = EmitScalarExpr(NumThreadsClause->getNumThreads(),
/*IgnoreResultAssign=*/true);
ProcBindKind ProcBind = OMP_PROC_BIND_default;
if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
ProcBind = ProcBindClause->getProcBindKind();
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
// The cleanup callback that finalizes all variabels at the given location,
// thus calls destructors etc.
auto FiniCB = [this](InsertPointTy IP) {
CGBuilderTy::InsertPointGuard IPG(Builder);
assert(IP.getBlock()->end() != IP.getPoint() &&
"OpenMP IR Builder should cause terminated block!");
llvm::BasicBlock *IPBB = IP.getBlock();
llvm::BasicBlock *DestBB = IPBB->splitBasicBlock(IP.getPoint());
IPBB->getTerminator()->eraseFromParent();
Builder.SetInsertPoint(IPBB);
CodeGenFunction::JumpDest Dest = getJumpDestInCurrentScope(DestBB);
EmitBranchThroughCleanup(Dest);
};
// Privatization callback that performs appropriate action for
// shared/private/firstprivate/lastprivate/copyin/... variables.
//
// TODO: This defaults to shared right now.
auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
llvm::Value &Val, llvm::Value *&ReplVal) {
// The next line is appropriate only for variables (Val) with the
// data-sharing attribute "shared".
ReplVal = &Val;
return CodeGenIP;
};
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
auto BodyGenCB = [ParallelRegionBodyStmt,
this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
llvm::BasicBlock &ContinuationBB) {
auto OldAllocaIP = AllocaInsertPt;
AllocaInsertPt = &*AllocaIP.getPoint();
auto OldReturnBlock = ReturnBlock;
ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB);
llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint());
llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator();
CodeGenIPBBTI->removeFromParent();
Builder.SetInsertPoint(CodeGenIPBB);
EmitStmt(ParallelRegionBodyStmt);
Builder.Insert(CodeGenIPBBTI);
AllocaInsertPt = OldAllocaIP;
ReturnBlock = OldReturnBlock;
};
CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB,
FiniCB, IfCond, NumThreads,
ProcBind, S.hasCancel()));
return;
}
// Emit parallel region as a standalone region. // Emit parallel region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF); Action.Enter(CGF);
@ -4747,6 +4829,19 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
break; break;
} }
} }
if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
// TODO: This check is necessary as we only generate `omp parallel` through
// the OpenMPIRBuilder for now.
if (S.getCancelRegion() == OMPD_parallel) {
llvm::Value *IfCondition = nullptr;
if (IfCond)
IfCondition = EmitScalarExpr(IfCond,
/*IgnoreResultAssign=*/true);
return Builder.restoreIP(
OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion()));
}
}
CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
S.getCancelRegion()); S.getCancelRegion());
} }

View File

@ -1,10 +1,10 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,CHECK
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,CHECK
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck %s --check-prefixes=ALL,IRBUILDER
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-enable-irbuilder -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -triple x86_64-apple-darwin13.4.0 -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple x86_64-apple-darwin13.4.0 -emit-pch -o %t %s
@ -16,7 +16,7 @@
float flag; float flag;
int main (int argc, char **argv) { int main (int argc, char **argv) {
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num( // ALL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
#pragma omp parallel #pragma omp parallel
{ {
#pragma omp cancel parallel if(flag) #pragma omp cancel parallel if(flag)
@ -24,15 +24,15 @@ int main (int argc, char **argv) {
#pragma omp barrier #pragma omp barrier
argv[0][0] += argc; argv[0][0] += argc;
} }
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( // ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
#pragma omp sections #pragma omp sections
{ {
#pragma omp cancel sections #pragma omp cancel sections
} }
// CHECK: call void @__kmpc_for_static_init_4( // ALL: call void @__kmpc_for_static_init_4(
// CHECK: call i32 @__kmpc_cancel( // ALL: call i32 @__kmpc_cancel(
// CHECK: call void @__kmpc_for_static_fini( // ALL: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_barrier(%struct.ident_t* // ALL: call void @__kmpc_barrier(%struct.ident_t*
#pragma omp sections #pragma omp sections
{ {
#pragma omp cancel sections #pragma omp cancel sections
@ -41,53 +41,53 @@ int main (int argc, char **argv) {
#pragma omp cancel sections #pragma omp cancel sections
} }
} }
// CHECK: call void @__kmpc_for_static_init_4( // ALL: call void @__kmpc_for_static_init_4(
// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3) // ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 // ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]] // ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
// CHECK: [[EXIT]] // ALL: [[EXIT]]
// CHECK: br label // ALL: br label
// CHECK: [[CONTINUE]] // ALL: [[CONTINUE]]
// CHECK: br label // ALL: br label
// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3) // ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 // ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]] // ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
// CHECK: [[EXIT]] // ALL: [[EXIT]]
// CHECK: br label // ALL: br label
// CHECK: [[CONTINUE]] // ALL: [[CONTINUE]]
// CHECK: br label // ALL: br label
// CHECK: call void @__kmpc_for_static_fini( // ALL: call void @__kmpc_for_static_fini(
#pragma omp for #pragma omp for
for (int i = 0; i < argc; ++i) { for (int i = 0; i < argc; ++i) {
#pragma omp cancel for if(cancel: flag) #pragma omp cancel for if(cancel: flag)
} }
// CHECK: call void @__kmpc_for_static_init_4( // ALL: call void @__kmpc_for_static_init_4(
// CHECK: [[FLAG:%.+]] = load float, float* @{{.+}}, // ALL: [[FLAG:%.+]] = load float, float* @{{.+}},
// CHECK: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00 // ALL: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
// CHECK: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]] // ALL: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
// CHECK: [[THEN]] // ALL: [[THEN]]
// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 2) // ALL: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 [[GTID]], i32 2)
// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0 // ALL: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]] // ALL: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
// CHECK: [[EXIT]] // ALL: [[EXIT]]
// CHECK: br label // ALL: br label
// CHECK: [[CONTINUE]] // ALL: [[CONTINUE]]
// CHECK: br label // ALL: br label
// CHECK: [[ELSE]] // ALL: [[ELSE]]
// CHECK: br label // ALL: br label
// CHECK: call void @__kmpc_for_static_fini( // ALL: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_barrier(%struct.ident_t* // ALL: call void @__kmpc_barrier(%struct.ident_t*
#pragma omp task #pragma omp task
{ {
#pragma omp cancel taskgroup #pragma omp cancel taskgroup
} }
// CHECK: call i8* @__kmpc_omp_task_alloc( // ALL: call i8* @__kmpc_omp_task_alloc(
// CHECK: call i32 @__kmpc_omp_task( // ALL: call i32 @__kmpc_omp_task(
#pragma omp parallel sections #pragma omp parallel sections
{ {
#pragma omp cancel sections #pragma omp cancel sections
} }
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( // ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
#pragma omp parallel sections #pragma omp parallel sections
{ {
#pragma omp cancel sections #pragma omp cancel sections
@ -96,14 +96,14 @@ for (int i = 0; i < argc; ++i) {
#pragma omp cancel sections #pragma omp cancel sections
} }
} }
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( // ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
int r = 0; int r = 0;
#pragma omp parallel for reduction(+: r) #pragma omp parallel for reduction(+: r)
for (int i = 0; i < argc; ++i) { for (int i = 0; i < argc; ++i) {
#pragma omp cancel for #pragma omp cancel for
r += i; r += i;
} }
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( // ALL: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
return argc; return argc;
} }
@ -173,4 +173,29 @@ for (int i = 0; i < argc; ++i) {
// CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_for_static_fini(
// CHECK: ret void // CHECK: ret void
// IRBUILDER: define internal void @main
// IRBUILDER: [[RETURN:omp.par.exit[^:]*]]
// IRBUILDER-NEXT: ret void
// IRBUILDER: [[FLAG:%.+]] = load float, float* @{{.+}},
// IRBUILDER: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
// IRBUILDER: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
// IRBUILDER: [[ELSE]]
// IRBUILDER-NEXT: br label %[[ELSE2:.*]]
// IRBUILDER: [[ELSE2]]
// The barrier directive should now call __kmpc_cancel_barrier
// IRBUILDER: call i32 @__kmpc_cancel_barrier(%struct.ident_t*
// IRBUILDER: br label
// IRBUILDER: [[THEN]]
// IRBUILDER: [[RES:%.+]] = call i32 @__kmpc_cancel(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1)
// IRBUILDER: [[CMP:%.+]] = icmp eq i32 [[RES]], 0
// IRBUILDER: br i1 [[CMP]], label %[[CONTINUE:[^,].+]], label %[[EXIT:.+]]
// IRBUILDER: [[EXIT]]
// IRBUILDER: br label %[[EXIT2:.+]]
// IRBUILDER: [[EXIT2]]
// IRBUILDER: br label %[[RETURN]]
// IRBUILDER: [[CONTINUE]]
// IRBUILDER: br label %[[ELSE:.+]]
#endif #endif

View File

@ -1,22 +1,31 @@
// RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s // RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,CHECK
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,CHECK-DEBUG %s
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,IRBUILDER-DEBUG %s
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics // expected-no-diagnostics
#ifndef HEADER #ifndef HEADER
#define HEADER #define HEADER
// CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } // ALL-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" // ALL-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } // ALL-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
// CHECK-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } // CHECK-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
// CHECK-DEBUG-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" // CHECK-DEBUG-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
// CHECK-DEBUG-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } // CHECK-DEBUG-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
// CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+19]];1;;\00" // CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+22]];1;;\00"
// CHECK-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+8]];1;;\00" // CHECK-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain;[[@LINE+11]];1;;\00"
// IRBUILDER-DEBUG-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
// IRBUILDER-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+19]];0;;\00"
// IRBUILDER-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;tmain<char **>;[[@LINE+8]];0;;\00"
template <class T> template <class T>
void foo(T argc) {} void foo(T argc) {}
@ -39,65 +48,74 @@ int main (int argc, char **argv) {
return tmain(argv); return tmain(argv);
} }
// CHECK-LABEL: define {{[a-z\_\b]*[ ]?i32}} @main({{i32[ ]?[a-z]*}} %argc, i8** %argv) // ALL-LABEL: define {{[a-z\_\b]*[ ]?i32}} @main({{i32[ ]?[a-z]*}} %argc, i8** %argv)
// CHECK: store i32 %argc, i32* [[ARGC_ADDR:%.+]], // ALL: store i32 %argc, i32* [[ARGC_ADDR:%.+]],
// CHECK: [[VLA:%.+]] = alloca i32, i{{[0-9]+}} [[VLA_SIZE:%[^,]+]], // ALL: [[VLA:%.+]] = alloca i32, i{{[0-9]+}} [[VLA_SIZE:%[^,]+]],
// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i{{[0-9]+}}, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i{{[0-9]+}} [[VLA_SIZE]], i32* [[VLA]]) // CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i{{[0-9]+}}, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i{{[0-9]+}} [[VLA_SIZE]], i32* [[VLA]])
// CHECK-NEXT: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}} // IRBUILDER: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* [[VLA]])
// CHECK-NEXT: [[RET:%.+]] = call {{[a-z\_\b]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]]) // ALL: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
// CHECK: ret i32 // ALL-NEXT: [[RET:%.+]] = call {{[a-z\_\b]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
// CHECK-NEXT: } // ALL: ret i32
// CHECK-DEBUG-LABEL: define i32 @main(i32 %argc, i8** %argv) // ALL-NEXT: }
// ALL-DEBUG-LABEL: define i32 @main(i32 %argc, i8** %argv)
// CHECK-DEBUG: [[LOC_2_ADDR:%.+]] = alloca %struct.ident_t // CHECK-DEBUG: [[LOC_2_ADDR:%.+]] = alloca %struct.ident_t
// CHECK-DEBUG: [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[LOC_2_ADDR]] to i8* // CHECK-DEBUG: [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[LOC_2_ADDR]] to i8*
// CHECK-DEBUG-NEXT: [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[DEF_LOC_2]] to i8* // CHECK-DEBUG-NEXT: [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[DEF_LOC_2]] to i8*
// CHECK-DEBUG-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[KMPC_LOC_VOIDPTR]], i8* align 8 [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i1 false) // CHECK-DEBUG-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[KMPC_LOC_VOIDPTR]], i8* align 8 [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i1 false)
// CHECK-DEBUG: store i32 %argc, i32* [[ARGC_ADDR:%.+]], // ALL-DEBUG: store i32 %argc, i32* [[ARGC_ADDR:%.+]],
// CHECK-DEBUG: [[VLA:%.+]] = alloca i32, i64 [[VLA_SIZE:%[^,]+]], // ALL-DEBUG: [[VLA:%.+]] = alloca i32, i64 [[VLA_SIZE:%[^,]+]],
// CHECK-DEBUG: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %struct.ident_t, %struct.ident_t* [[LOC_2_ADDR]], i32 0, i32 4 // CHECK-DEBUG: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %struct.ident_t, %struct.ident_t* [[LOC_2_ADDR]], i32 0, i32 4
// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC1]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]] // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC1]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]]
// CHECK-DEBUG: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[LOC_2_ADDR]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 [[VLA_SIZE]], i32* [[VLA]]) // CHECK-DEBUG: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[LOC_2_ADDR]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 [[VLA_SIZE]], i32* [[VLA]])
// CHECK-DEBUG-NEXT: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}} // IRBUILDER-DEBUG: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* [[VLA]])
// CHECK-DEBUG-NEXT: [[RET:%.+]] = call i32 [[TMAIN:@.+tmain.+]](i8** [[ARGV]]) // ALL-DEBUG: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
// CHECK-DEBUG: ret i32 // ALL-DEBUG: [[RET:%.+]] = call i32 [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
// CHECK-DEBUG-NEXT: } // ALL-DEBUG: ret i32
// ALL-DEBUG-NEXT: }
// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i{{[0-9]+}}{{.*}} [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]]) // CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i{{[0-9]+}}{{.*}} [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
// CHECK-SAME: #[[FN_ATTRS:[0-9]+]] // CHECK-SAME: #[[FN_ATTRS:[0-9]+]]
// IRBUILDER: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i32* [[VLA_REF:%[^)]+]])
// IRBUILDER-SAME: #[[FN_ATTRS:[0-9]+]]
// CHECK: store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]], // CHECK: store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]],
// CHECK: [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]] // CHECK: [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]]
// CHECK: [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i{{[0-9]+}} 1 // ALL: [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i{{[0-9]+}} 1
// CHECK-NEXT: [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]] // ALL-NEXT: [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]]
// CHECK-NEXT: invoke {{.*}}void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[VLA_ELEM]]) // CHECK-NEXT: invoke {{.*}}void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[VLA_ELEM]])
// IRBUILDER: call {{.*}}void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[VLA_ELEM]])
// CHECK: ret void // CHECK: ret void
// CHECK: call {{.*}}void @{{.+terminate.*|abort}}( // CHECK: call {{.*}}void @{{.+terminate.*|abort}}(
// CHECK-NEXT: unreachable // CHECK-NEXT: unreachable
// CHECK-NEXT: } // CHECK-NEXT: }
// CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]]) // CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
// CHECK-DEBUG-SAME: #[[FN_ATTRS:[0-9]+]] // CHECK-DEBUG-SAME: #[[FN_ATTRS:[0-9]+]]
// IRBUILDER-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i32* [[VLA_REF:%[^)]+]])
// IRBUILDER-DEBUG-SAME: #[[FN_ATTRS:[0-9]+]]
// CHECK-DEBUG: store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]], // CHECK-DEBUG: store i32* [[VLA_ADDR]], i32** [[VLA_PTR_ADDR:%.+]],
// CHECK-DEBUG: [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]] // CHECK-DEBUG: [[VLA_REF:%.+]] = load i32*, i32** [[VLA_PTR_ADDR]]
// CHECK-DEBUG: [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i64 1 // ALL-DEBUG: [[VLA_ELEM_REF:%.+]] = getelementptr inbounds i32, i32* [[VLA_REF]], i64 1
// CHECK-DEBUG-NEXT: [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]] // ALL-DEBUG-NEXT: [[VLA_ELEM:%.+]] = load i32, i32* [[VLA_ELEM_REF]]
// CHECK-DEBUG-NEXT: invoke void [[FOO:@.+foo.+]](i32 [[VLA_ELEM]]) // CHECK-DEBUG-NEXT: invoke void [[FOO:@.+foo.+]](i32 [[VLA_ELEM]])
// IRBUILDER-DEBUG-NEXT: call void [[FOO:@.+foo.+]](i32 [[VLA_ELEM]])
// CHECK-DEBUG: ret void // CHECK-DEBUG: ret void
// CHECK-DEBUG: call void @{{.+terminate.*|abort}}( // CHECK-DEBUG: call void @{{.+terminate.*|abort}}(
// CHECK-DEBUG-NEXT: unreachable // CHECK-DEBUG-NEXT: unreachable
// CHECK-DEBUG-NEXT: } // CHECK-DEBUG-NEXT: }
// CHECK-DAG: define linkonce_odr {{.*}}void [[FOO]]({{i32[ ]?[a-z]*}} %argc) // ALL-DAG: define linkonce_odr {{.*}}void [[FOO]]({{i32[ ]?[a-z]*}} %argc)
// CHECK-DAG: declare !callback ![[cbid:[0-9]+]] {{.*}}void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) // ALL-DAG: declare !callback ![[cbid:[0-9]+]] {{.*}}void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
// CHECK-DEBUG-DAG: define linkonce_odr void [[FOO]](i32 %argc) // ALL-DEBUG-DAG: define linkonce_odr void [[FOO]](i32 %argc)
// CHECK-DEBUG-DAG: declare !callback ![[cbid:[0-9]+]] void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) // ALL-DEBUG-DAG: declare !callback ![[cbid:[0-9]+]] void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
// CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]]) // CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 [[VLA_SIZE:%.+]], i32* {{.+}} [[VLA_ADDR:%[^)]+]])
// CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]] // CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]
// CHECK: define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc) // ALL: define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc)
// CHECK: store i8** %argc, i8*** [[ARGC_ADDR:%.+]], // ALL: store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i{{64|32}})* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i{{64|32}} %{{.+}}) // CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i{{64|32}})* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i{{64|32}} %{{.+}})
// CHECK-NEXT: ret i32 0 // IRBUILDER: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC_2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
// CHECK-NEXT: } // ALL: ret i32 0
// CHECK-DEBUG: define linkonce_odr i32 [[TMAIN]](i8** %argc) // ALL-NEXT: }
// ALL-DEBUG: define linkonce_odr i32 [[TMAIN]](i8** %argc)
// CHECK-DEBUG-DAG: [[LOC_2_ADDR:%.+]] = alloca %struct.ident_t // CHECK-DEBUG-DAG: [[LOC_2_ADDR:%.+]] = alloca %struct.ident_t
// CHECK-DEBUG: [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[LOC_2_ADDR]] to i8* // CHECK-DEBUG: [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[LOC_2_ADDR]] to i8*
// CHECK-DEBUG-NEXT: [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[DEF_LOC_2]] to i8* // CHECK-DEBUG-NEXT: [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %struct.ident_t* [[DEF_LOC_2]] to i8*
@ -106,35 +124,40 @@ int main (int argc, char **argv) {
// CHECK-DEBUG: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %struct.ident_t, %struct.ident_t* [[LOC_2_ADDR]], i32 0, i32 4 // CHECK-DEBUG: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %struct.ident_t, %struct.ident_t* [[LOC_2_ADDR]], i32 0, i32 4
// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC2]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]] // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC2]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]]
// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[LOC_2_ADDR]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}}) // CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[LOC_2_ADDR]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
// CHECK-DEBUG-NEXT: ret i32 0 // IRBUILDER-DEBUG: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.*}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 %{{.+}})
// CHECK-DEBUG-NEXT: } // ALL-DEBUG: ret i32 0
// ALL-DEBUG-NEXT: }
// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i{{64|32}}{{.*}} %{{.+}}) // CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i{{64|32}}{{.*}} %{{.+}})
// IRBUILDER: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i8*** [[ARGC_REF:%.*]], i{{64|32}}{{.*}} %{{.+}})
// CHECK: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]], // CHECK: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]],
// CHECK: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]] // CHECK: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]]
// CHECK: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] // ALL: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
// CHECK-NEXT: invoke {{.*}}void [[FOO1:@.+foo.+]](i8** [[ARGC]]) // CHECK-NEXT: invoke {{.*}}void [[FOO1:@.+foo.+]](i8** [[ARGC]])
// IRBUILDER-NEXT: call {{.*}}void [[FOO1:@.+foo.+]](i8** [[ARGC]])
// CHECK: ret void // CHECK: ret void
// CHECK: call {{.*}}void @{{.+terminate.*|abort}}( // CHECK: call {{.*}}void @{{.+terminate.*|abort}}(
// CHECK-NEXT: unreachable // CHECK-NEXT: unreachable
// CHECK-NEXT: } // CHECK-NEXT: }
// CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i64 %{{.+}}) // CHECK-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i64 %{{.+}})
// IRBUILDER-DEBUG: define internal void [[OMP_OUTLINED_DEBUG:@.+]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i8*** [[ARGC_REF:%.*]], i64 %{{.+}})
// CHECK-DEBUG: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]], // CHECK-DEBUG: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]],
// CHECK-DEBUG: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]] // CHECK-DEBUG: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]]
// CHECK-DEBUG: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] // ALL-DEBUG: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
// CHECK-DEBUG-NEXT: invoke void [[FOO1:@.+foo.+]](i8** [[ARGC]]) // CHECK-DEBUG-NEXT: invoke void [[FOO1:@.+foo.+]](i8** [[ARGC]])
// IRBUILDER-DEBUG-NEXT: call void [[FOO1:@.+foo.+]](i8** [[ARGC]])
// CHECK-DEBUG: ret void // CHECK-DEBUG: ret void
// CHECK-DEBUG: call void @{{.+terminate.*|abort}}( // CHECK-DEBUG: call void @{{.+terminate.*|abort}}(
// CHECK-DEBUG-NEXT: unreachable // CHECK-DEBUG-NEXT: unreachable
// CHECK-DEBUG-NEXT: } // CHECK-DEBUG-NEXT: }
// CHECK: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc) // ALL: define linkonce_odr {{.*}}void [[FOO1]](i8** %argc)
// CHECK-DEBUG-DAG: define linkonce_odr void [[FOO1]](i8** %argc) // CHECK-DEBUG-DAG: define linkonce_odr void [[FOO1]](i8** %argc)
// CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i64 %{{.+}}) // CHECK-DEBUG-DAG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i64 %{{.+}})
// CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]({{[^)]+}}){{[^,]*}}, !dbg // CHECK-DEBUG-DAG: call void [[OMP_OUTLINED_DEBUG]]({{[^)]+}}){{[^,]*}}, !dbg
// CHECK: attributes #[[FN_ATTRS]] = {{.+}} nounwind // ALL: attributes #[[FN_ATTRS]] = {{.+}} nounwind
// CHECK-DEBUG: attributes #[[FN_ATTRS]] = {{.+}} nounwind // ALL-DEBUG: attributes #[[FN_ATTRS]] = {{.+}} nounwind
// CHECK: ![[cbid]] = !{![[cbidb:[0-9]+]]} // ALL: ![[cbid]] = !{![[cbidb:[0-9]+]]}
// CHECK: ![[cbidb]] = !{i64 2, i64 -1, i64 -1, i1 true} // ALL: ![[cbidb]] = !{i64 2, i64 -1, i64 -1, i1 true}
#endif #endif

View File

@ -205,6 +205,9 @@ __OMP_ATTRS_SET(GetterAttrs,
#define __OMP_RTL_ATTRS(Name, FnAttrSet, RetAttrSet, ArgAttrSets) \ #define __OMP_RTL_ATTRS(Name, FnAttrSet, RetAttrSet, ArgAttrSets) \
OMP_RTL_ATTRS(OMPRTL_##Name, FnAttrSet, RetAttrSet, ArgAttrSets) OMP_RTL_ATTRS(OMPRTL_##Name, FnAttrSet, RetAttrSet, ArgAttrSets)
__OMP_RTL_ATTRS(__kmpc_fork_call, AttributeSet(EnumAttr(NoUnwind)),
AttributeSet(), {})
__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {})
__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {}) __OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {})

View File

@ -65,7 +65,7 @@ void llvm::omp::types::initializeTypes(Module &M) {
#define OMP_TYPE(VarName, InitValue) VarName = InitValue; #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \ VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
VarName##Ptr = PointerType::getUnqual(T); VarName##Ptr = PointerType::getUnqual(VarName);
#define OMP_STRUCT_TYPE(VarName, StructName, ...) \ #define OMP_STRUCT_TYPE(VarName, StructName, ...) \
T = M.getTypeByName(StructName); \ T = M.getTypeByName(StructName); \
if (!T) \ if (!T) \

View File

@ -18,6 +18,7 @@
#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/CFG.h" #include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfo.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/IRBuilder.h" #include "llvm/IR/IRBuilder.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h" #include "llvm/Support/Error.h"
@ -501,10 +502,21 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
dbgs() << " PBR: " << BB->getName() << "\n"; dbgs() << " PBR: " << BB->getName() << "\n";
}); });
// Add some known attributes to the outlined function.
Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
OutlinedFn->addParamAttr(0, Attribute::NoAlias);
OutlinedFn->addParamAttr(1, Attribute::NoAlias);
OutlinedFn->addFnAttr(Attribute::NoUnwind);
OutlinedFn->addFnAttr(Attribute::NoRecurse);
LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n"); LLVM_DEBUG(dbgs() << "After outlining: " << *UI->getFunction() << "\n");
LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n"); LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
// For compability with the clang CG we move the outlined function after the
// one with the parallel region.
OutlinedFn->removeFromParent();
M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
// Remove the artificial entry introduced by the extractor right away, we // Remove the artificial entry introduced by the extractor right away, we
// made our own entry block after all. // made our own entry block after all.
{ {
@ -535,6 +547,23 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end()); RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call); FunctionCallee RTLFn = getOrCreateRuntimeFunction(OMPRTL___kmpc_fork_call);
if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
llvm::LLVMContext &Ctx = F->getContext();
MDBuilder MDB(Ctx);
// Annotate the callback behavior of the __kmpc_fork_call:
// - The callback callee is argument number 2 (microtask).
// - The first two arguments of the callback callee are unknown (-1).
// - All variadic arguments to the __kmpc_fork_call are passed to the
// callback callee.
F->addMetadata(
llvm::LLVMContext::MD_callback,
*llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2, {-1, -1},
/* VarArgsArePassed */ true)}));
}
}
Builder.CreateCall(RTLFn, RealArgs); Builder.CreateCall(RTLFn, RealArgs);
LLVM_DEBUG(dbgs() << "With fork_call placed: " LLVM_DEBUG(dbgs() << "With fork_call placed: "

View File

@ -376,6 +376,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
Function *OutlinedFn = PrivAI->getFunction(); Function *OutlinedFn = PrivAI->getFunction();
EXPECT_NE(F, OutlinedFn); EXPECT_NE(F, OutlinedFn);
EXPECT_FALSE(verifyModule(*M)); EXPECT_FALSE(verifyModule(*M));
EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoUnwind));
EXPECT_TRUE(OutlinedFn->hasFnAttribute(Attribute::NoRecurse));
EXPECT_TRUE(OutlinedFn->hasParamAttribute(0, Attribute::NoAlias));
EXPECT_TRUE(OutlinedFn->hasParamAttribute(1, Attribute::NoAlias));
EXPECT_TRUE(OutlinedFn->hasInternalLinkage()); EXPECT_TRUE(OutlinedFn->hasInternalLinkage());
EXPECT_EQ(OutlinedFn->arg_size(), 3U); EXPECT_EQ(OutlinedFn->arg_size(), 3U);