forked from OSchip/llvm-project
Add code generation for teams directive inside target region
llvm-svn: 262652
This commit is contained in:
parent
e5960ceb6b
commit
430d8ecc55
|
@ -537,6 +537,12 @@ enum OpenMPRTLFunction {
|
|||
// Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
|
||||
// kmp_int32 cncl_kind);
|
||||
OMPRTL__kmpc_cancel,
|
||||
// Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
|
||||
// kmp_int32 num_teams, kmp_int32 thread_limit);
|
||||
OMPRTL__kmpc_push_num_teams,
|
||||
/// \brief Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
|
||||
/// kmpc_micro microtask, ...);
|
||||
OMPRTL__kmpc_fork_teams,
|
||||
|
||||
//
|
||||
// Offloading related calls
|
||||
|
@ -625,7 +631,7 @@ static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
|
|||
return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
|
||||
}
|
||||
|
||||
llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
|
||||
llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
|
||||
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
|
||||
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
|
||||
assert(ThreadIDVar->getType()->isPointerType() &&
|
||||
|
@ -1205,6 +1211,26 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
|
|||
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
|
||||
break;
|
||||
}
|
||||
case OMPRTL__kmpc_push_num_teams: {
|
||||
// Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
|
||||
// kmp_int32 num_teams, kmp_int32 num_threads)
|
||||
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
|
||||
CGM.Int32Ty};
|
||||
llvm::FunctionType *FnTy =
|
||||
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
||||
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
|
||||
break;
|
||||
}
|
||||
case OMPRTL__kmpc_fork_teams: {
|
||||
// Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
|
||||
// microtask, ...);
|
||||
llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
|
||||
getKmpc_MicroPointerTy()};
|
||||
llvm::FunctionType *FnTy =
|
||||
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
|
||||
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
|
||||
break;
|
||||
}
|
||||
case OMPRTL__tgt_target: {
|
||||
// Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
|
||||
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
|
||||
|
@ -4604,3 +4630,43 @@ llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
|
|||
// compilation unit.
|
||||
return createOffloadingBinaryDescriptorRegistration();
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
|
||||
const OMPExecutableDirective &D,
|
||||
SourceLocation Loc,
|
||||
llvm::Value *OutlinedFn,
|
||||
ArrayRef<llvm::Value *> CapturedVars) {
|
||||
if (!CGF.HaveInsertPoint())
|
||||
return;
|
||||
|
||||
auto *RTLoc = emitUpdateLocation(CGF, Loc);
|
||||
CodeGenFunction::RunCleanupsScope Scope(CGF);
|
||||
|
||||
// Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
|
||||
llvm::Value *Args[] = {
|
||||
RTLoc,
|
||||
CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
|
||||
CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
|
||||
llvm::SmallVector<llvm::Value *, 16> RealArgs;
|
||||
RealArgs.append(std::begin(Args), std::end(Args));
|
||||
RealArgs.append(CapturedVars.begin(), CapturedVars.end());
|
||||
|
||||
auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
|
||||
CGF.EmitRuntimeCall(RTLFn, RealArgs);
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
|
||||
llvm::Value *NumTeams,
|
||||
llvm::Value *ThreadLimit,
|
||||
SourceLocation Loc) {
|
||||
if (!CGF.HaveInsertPoint())
|
||||
return;
|
||||
|
||||
auto *RTLoc = emitUpdateLocation(CGF, Loc);
|
||||
|
||||
// Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
|
||||
llvm::Value *PushNumTeamsArgs[] = {
|
||||
RTLoc, getThreadID(CGF, Loc), NumTeams, ThreadLimit};
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
|
||||
PushNumTeamsArgs);
|
||||
}
|
||||
|
|
|
@ -372,7 +372,7 @@ public:
|
|||
/// \param InnermostKind Kind of innermost directive (for simple directives it
|
||||
/// is a directive itself, for combined - its innermost directive).
|
||||
/// \param CodeGen Code generation sequence for the \a D directive.
|
||||
virtual llvm::Value *emitParallelOutlinedFunction(
|
||||
virtual llvm::Value *emitParallelOrTeamsOutlinedFunction(
|
||||
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
|
||||
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
|
||||
|
||||
|
@ -782,6 +782,28 @@ public:
|
|||
/// was emitted in the current module and return the function that registers
|
||||
/// it.
|
||||
virtual llvm::Function *emitRegistrationFunction();
|
||||
|
||||
/// \brief Emits code for teams call of the \a OutlinedFn with
|
||||
/// variables captured in a record which address is stored in \a
|
||||
/// CapturedStruct.
|
||||
/// \param OutlinedFn Outlined function to be run by team masters. Type of
|
||||
/// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
|
||||
/// \param CapturedVars A pointer to the record with the references to
|
||||
/// variables used in \a OutlinedFn function.
|
||||
///
|
||||
virtual void emitTeamsCall(CodeGenFunction &CGF,
|
||||
const OMPExecutableDirective &D,
|
||||
SourceLocation Loc, llvm::Value *OutlinedFn,
|
||||
ArrayRef<llvm::Value *> CapturedVars);
|
||||
|
||||
/// \brief Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32
|
||||
/// global_tid, kmp_int32 num_teams, kmp_int32 thread_limit) to generate code
|
||||
/// for num_teams clause.
|
||||
/// \param NumTeams An integer value of teams.
|
||||
/// \param ThreadsLimit An integer value of threads.
|
||||
virtual void emitNumTeamsClause(CodeGenFunction &CGF, llvm::Value *NumTeams,
|
||||
llvm::Value *ThreadLimit, SourceLocation Loc);
|
||||
|
||||
};
|
||||
|
||||
} // namespace CodeGen
|
||||
|
|
|
@ -983,8 +983,9 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
|
|||
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
||||
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
|
||||
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
|
||||
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
|
||||
S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
|
||||
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
|
||||
emitParallelOrTeamsOutlinedFunction(S,
|
||||
*CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
|
||||
if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
|
||||
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
|
||||
auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
|
||||
|
@ -2716,12 +2717,48 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
|
|||
CapturedVars);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
|
||||
OMPLexicalScope Scope(*this, S);
|
||||
const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
|
||||
static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
|
||||
const OMPExecutableDirective &S,
|
||||
OpenMPDirectiveKind InnermostKind,
|
||||
const RegionCodeGenTy &CodeGen) {
|
||||
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
||||
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
|
||||
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
|
||||
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
|
||||
emitParallelOrTeamsOutlinedFunction(S,
|
||||
*CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
|
||||
|
||||
// FIXME: We should fork teams here instead of just emit the statement.
|
||||
EmitStmt(CS.getCapturedStmt());
|
||||
const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
|
||||
const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
|
||||
const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
|
||||
if (NT || TL) {
|
||||
llvm::Value *NumTeamsVal = (NT) ? CGF.Builder.CreateIntCast(
|
||||
CGF.EmitScalarExpr(NT->getNumTeams()), CGF.CGM.Int32Ty,
|
||||
/* isSigned = */ true) :
|
||||
CGF.Builder.getInt32(0);
|
||||
|
||||
llvm::Value *ThreadLimitVal = (TL) ? CGF.Builder.CreateIntCast(
|
||||
CGF.EmitScalarExpr(TL->getThreadLimit()), CGF.CGM.Int32Ty,
|
||||
/* isSigned = */ true) :
|
||||
CGF.Builder.getInt32(0);
|
||||
|
||||
CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeamsVal,
|
||||
ThreadLimitVal, S.getLocStart());
|
||||
}
|
||||
|
||||
CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
|
||||
CapturedVars);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
// Emit parallel region as a standalone region.
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
|
||||
OMPPrivateScope PrivateScope(CGF);
|
||||
(void)PrivateScope.Privatize();
|
||||
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
};
|
||||
emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPCancellationPointDirective(
|
||||
|
|
|
@ -2221,6 +2221,8 @@ public:
|
|||
llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K);
|
||||
llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
|
||||
Address GenerateCapturedStmtArgument(const CapturedStmt &S);
|
||||
llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
|
||||
QualType ReturnQTy);
|
||||
llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S);
|
||||
void GenerateOpenMPCapturedVars(const CapturedStmt &S,
|
||||
SmallVectorImpl<llvm::Value *> &CapturedVars);
|
||||
|
|
|
@ -207,4 +207,136 @@ int teams_template_struct(void) {
|
|||
|
||||
}
|
||||
#endif // CK3
|
||||
|
||||
// Test target codegen - host bc file has to be created first.
|
||||
// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
|
||||
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
|
||||
// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
|
||||
// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
|
||||
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
|
||||
|
||||
#ifdef CK4
|
||||
|
||||
// CK4-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
|
||||
// CK4-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
|
||||
// CK4-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
|
||||
// CK4-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}teams_codegen.cpp;main;[[@LINE+14]];9;;\00"
|
||||
// CK4-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}teams_codegen.cpp;tmain;[[@LINE+7]];9;;\00"
|
||||
|
||||
template <typename T>
|
||||
int tmain(T argc) {
|
||||
#pragma omp target
|
||||
#pragma omp teams
|
||||
argc = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
#pragma omp target
|
||||
#pragma omp teams
|
||||
argc = 0;
|
||||
return tmain(argv);
|
||||
}
|
||||
|
||||
// CK4: define {{.*}}void @{{[^,]+}}(i{{.+}} %[[ARGC:.+]])
|
||||
// CK4: [[ARGCADDR:%.+]] = alloca i{{.+}}
|
||||
// CK4: store i{{.+}} %[[ARGC]], i{{.+}}* [[ARGCADDR]]
|
||||
// CK4-64: [[CONV:%.+]] = bitcast i64* [[ARGCADDR]] to i32*
|
||||
// CK4-64: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* {{.+}} to void (i32*, i32*, ...)*), i32* [[CONV]])
|
||||
// CK4-32: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* {{.+}} to void (i32*, i32*, ...)*), i32* [[ARGCADDR]])
|
||||
// CK4: ret void
|
||||
// CK4-NEXT: }
|
||||
|
||||
// CK4: define {{.*}}void @{{[^,]+}}(i8*** dereferenceable({{.}}) [[ARGC1:%.+]])
|
||||
// CK4: [[ARGCADDR1:%.+]] = alloca i8***
|
||||
// CK4: store i8*** [[ARGC1]], i8**** [[ARGCADDR1]]
|
||||
// CK4: [[CONV1:%.+]] = load i8***, i8**** [[ARGCADDR1]]
|
||||
// CK4: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* {{.+}} to void (i32*, i32*, ...)*), i8*** [[CONV1]])
|
||||
|
||||
|
||||
#endif // CK4
|
||||
|
||||
// Test target codegen - host bc file has to be created first.
|
||||
// RUN: %clang_cc1 -DCK5 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -DCK5 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CK5 --check-prefix CK5-64
|
||||
// RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK5 --check-prefix CK5-64
|
||||
// RUN: %clang_cc1 -DCK5 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
|
||||
// RUN: %clang_cc1 -DCK5 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CK5 --check-prefix CK5-32
|
||||
// RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK5 --check-prefix CK5-32
|
||||
|
||||
// expected-no-diagnostics
|
||||
#ifdef CK5
|
||||
|
||||
// CK5-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
|
||||
// CK5-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
|
||||
// CK5-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
|
||||
// CK5-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}teams_codegen.cpp;main;[[@LINE+14]];9;;\00"
|
||||
// CK5-DEBUG-DAG: [[LOC2:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}teams_codegen.cpp;tmain;[[@LINE+7]];9;;\00"
|
||||
|
||||
template <typename T>
|
||||
int tmain(T argc) {
|
||||
int a = 10;
|
||||
int b = 5;
|
||||
#pragma omp target
|
||||
#pragma omp teams num_teams(a) thread_limit(b)
|
||||
{
|
||||
argc = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
int a = 20;
|
||||
int b = 5;
|
||||
#pragma omp target
|
||||
#pragma omp teams num_teams(a) thread_limit(b)
|
||||
{
|
||||
argc = 0;
|
||||
}
|
||||
return tmain(argv);
|
||||
}
|
||||
|
||||
// CK5: define {{.*}}void @{{[^,]+}}(i{{.+}} [[AP:%.+]], i{{.+}} [[BP:%.+]], i{{.+}} [[ARGC:.+]])
|
||||
// CK5: [[AADDR:%.+]] = alloca i{{.+}}
|
||||
// CK5: [[BADDR:%.+]] = alloca i{{.+}}
|
||||
// CK5: [[ARGCADDR:%.+]] = alloca i{{.+}}
|
||||
// CK5: [[GBL_TH_NUM:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEF_LOC_0]])
|
||||
// CK5: store i{{.+}} [[AP]], i{{.+}}* [[AADDR]]
|
||||
// CK5: store i{{.+}} [[BP]], i{{.+}}* [[BADDR]]
|
||||
// CK5: store i{{.+}} [[ARGC]], i{{.+}}* [[ARGCADDR]]
|
||||
// CK5-64: [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32*
|
||||
// CK5-64: [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32*
|
||||
// CK5-64: [[CONV:%.+]] = bitcast i64* [[ARGCADDR]] to i32*
|
||||
// CK5-64: [[ACONVVAL:%.+]] = load i32, i32* [[ACONV]]
|
||||
// CK5-64: [[BCONVVAL:%.+]] = load i32, i32* [[BCONV]]
|
||||
// CK5-32: [[ACONVVAL:%.+]] = load i32, i32* [[AADDR]]
|
||||
// CK5-32: [[BCONVVAL:%.+]] = load i32, i32* [[BADDR]]
|
||||
// CK5: {{.+}} = call i32 @__kmpc_push_num_teams(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TH_NUM]], i32 [[ACONVVAL]], i32 [[BCONVVAL]])
|
||||
// CK5-64: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV]])
|
||||
// CK5-32: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[ARGCADDR]])
|
||||
|
||||
// CK5: define {{.*}}void @{{[^,]+}}(i{{.+}} dereferenceable({{.+}}) [[AP:%.+]], i{{.+}} dereferenceable({{.+}}) [[BP:%.+]], i{{.+}} dereferenceable({{.+}}) [[ARGC:%.+]])
|
||||
// CK5: [[AADDR:%.+]] = alloca i{{.+}}
|
||||
// CK5: [[BADDR:%.+]] = alloca i{{.+}}
|
||||
// CK5: [[ARGCADDR:%.+]] = alloca i{{.+}}
|
||||
// CK5: [[GBL_TH_NUM:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEF_LOC_0]])
|
||||
// CK5: store i{{.+}} [[AP]], i{{.+}}* [[AADDR]]
|
||||
// CK5: store i{{.+}} [[BP]], i{{.+}}* [[BADDR]]
|
||||
// CK5: store i{{.+}} [[ARGC]], i{{.+}}* [[ARGCADDR]]
|
||||
// CK5: [[A_ADDR_VAL:%.+]] = load i32*, i32** [[AADDR]]
|
||||
// CK5: [[B_ADDR_VAL:%.+]] = load i32*, i32** [[BADDR]]
|
||||
// CK5: [[ARGC_ADDR_VAL:%.+]] = load i{{.+}}, i{{.+}}* [[ARGCADDR]]
|
||||
// CK5: [[A_VAL:%.+]] = load i32, i32* [[A_ADDR_VAL]]
|
||||
// CK5: [[B_VAL:%.+]] = load i32, i32* [[B_ADDR_VAL]]
|
||||
// CK5: {{.+}} = call i32 @__kmpc_push_num_teams(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TH_NUM]], i32 [[A_VAL]], i32 [[B_VAL]])
|
||||
// CK5: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC_0]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i{{.+}})* @.omp_outlined.{{.+}} to void (i32*, i32*, ...)*), i{{.+}} [[ARGC_ADDR_VAL]])
|
||||
// CK5: ret void
|
||||
// CK5-NEXT: }
|
||||
|
||||
#endif // CK5
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue