forked from OSchip/llvm-project
[OpenMP] Code generation for teams - kernel launching
Summary: This patch implements the launching of a target region in the presence of a nested teams region, i.e calls tgt_target_teams with the required arguments gathered from the enclosed teams directive. The actual codegen of the region enclosed by the teams construct will be contributed in a separate patch. Reviewers: hfinkel, arpith-jacob, kkwli0, carlo.bertolli, ABataev Subscribers: cfe-commits, caomhin, fraggamuffin Differential Revision: http://reviews.llvm.org/D17019 llvm-svn: 262625
This commit is contained in:
parent
b840a6d6f4
commit
b68e2db8f9
|
@ -252,6 +252,70 @@ private:
|
|||
StringRef HelperName;
|
||||
};
|
||||
|
||||
static void EmptyCodeGen(CodeGenFunction &) {
|
||||
llvm_unreachable("No codegen for expressions");
|
||||
}
|
||||
/// \brief API for generation of expressions captured in a innermost OpenMP
|
||||
/// region.
|
||||
class CGOpenMPInnerExprInfo : public CGOpenMPInlinedRegionInfo {
|
||||
public:
|
||||
CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
|
||||
: CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
|
||||
OMPD_unknown,
|
||||
/*HasCancel=*/false),
|
||||
PrivScope(CGF) {
|
||||
// Make sure the globals captured in the provided statement are local by
|
||||
// using the privatization logic. We assume the same variable is not
|
||||
// captured more than once.
|
||||
for (auto &C : CS.captures()) {
|
||||
if (!C.capturesVariable() && !C.capturesVariableByCopy())
|
||||
continue;
|
||||
|
||||
const VarDecl *VD = C.getCapturedVar();
|
||||
if (VD->isLocalVarDeclOrParm())
|
||||
continue;
|
||||
|
||||
DeclRefExpr DRE(const_cast<VarDecl *>(VD),
|
||||
/*RefersToEnclosingVariableOrCapture=*/false,
|
||||
VD->getType().getNonReferenceType(), VK_LValue,
|
||||
SourceLocation());
|
||||
PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
|
||||
return CGF.EmitLValue(&DRE).getAddress();
|
||||
});
|
||||
}
|
||||
(void)PrivScope.Privatize();
|
||||
}
|
||||
|
||||
/// \brief Lookup the captured field decl for a variable.
|
||||
const FieldDecl *lookup(const VarDecl *VD) const override {
|
||||
if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
|
||||
return FD;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// \brief Emit the captured statement body.
|
||||
void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
|
||||
llvm_unreachable("No body for expressions");
|
||||
}
|
||||
|
||||
/// \brief Get a variable or parameter for storing global thread id
|
||||
/// inside OpenMP construct.
|
||||
const VarDecl *getThreadIDVariable() const override {
|
||||
llvm_unreachable("No thread id for expressions");
|
||||
}
|
||||
|
||||
/// \brief Get the name of the capture helper.
|
||||
StringRef getHelperName() const override {
|
||||
llvm_unreachable("No helper name for expressions");
|
||||
}
|
||||
|
||||
static bool classof(const CGCapturedStmtInfo *Info) { return false; }
|
||||
|
||||
private:
|
||||
/// Private scope to capture global variables.
|
||||
CodeGenFunction::OMPPrivateScope PrivScope;
|
||||
};
|
||||
|
||||
/// \brief RAII for emitting code of OpenMP constructs.
|
||||
class InlinedOpenMPRegionRAII {
|
||||
CodeGenFunction &CGF;
|
||||
|
@ -481,6 +545,10 @@ enum OpenMPRTLFunction {
|
|||
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
|
||||
// *arg_types);
|
||||
OMPRTL__tgt_target,
|
||||
// Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
|
||||
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
|
||||
// int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
|
||||
OMPRTL__tgt_target_teams,
|
||||
// Call to void __tgt_register_lib(__tgt_bin_desc *desc);
|
||||
OMPRTL__tgt_register_lib,
|
||||
// Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
|
||||
|
@ -1153,6 +1221,24 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
|
|||
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
|
||||
break;
|
||||
}
|
||||
case OMPRTL__tgt_target_teams: {
|
||||
// Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
|
||||
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
|
||||
// int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
|
||||
llvm::Type *TypeParams[] = {CGM.Int32Ty,
|
||||
CGM.VoidPtrTy,
|
||||
CGM.Int32Ty,
|
||||
CGM.VoidPtrPtrTy,
|
||||
CGM.VoidPtrPtrTy,
|
||||
CGM.SizeTy->getPointerTo(),
|
||||
CGM.Int32Ty->getPointerTo(),
|
||||
CGM.Int32Ty,
|
||||
CGM.Int32Ty};
|
||||
llvm::FunctionType *FnTy =
|
||||
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
|
||||
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
|
||||
break;
|
||||
}
|
||||
case OMPRTL__tgt_register_lib: {
|
||||
// Build void __tgt_register_lib(__tgt_bin_desc *desc);
|
||||
QualType ParamTy =
|
||||
|
@ -3972,6 +4058,102 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction(
|
|||
DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
|
||||
}
|
||||
|
||||
/// \brief Emit the num_teams clause of an enclosed teams directive at the
|
||||
/// target region scope. If there is no teams directive associated with the
|
||||
/// target directive, or if there is no num_teams clause associated with the
|
||||
/// enclosed teams directive, return nullptr.
|
||||
static llvm::Value *
|
||||
emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
|
||||
CodeGenFunction &CGF,
|
||||
const OMPExecutableDirective &D) {
|
||||
|
||||
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
|
||||
"teams directive expected to be "
|
||||
"emitted only for the host!");
|
||||
|
||||
// FIXME: For the moment we do not support combined directives with target and
|
||||
// teams, so we do not expect to get any num_teams clause in the provided
|
||||
// directive. Once we support that, this assertion can be replaced by the
|
||||
// actual emission of the clause expression.
|
||||
assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
|
||||
"Not expecting clause in directive.");
|
||||
|
||||
// If the current target region has a teams region enclosed, we need to get
|
||||
// the number of teams to pass to the runtime function call. This is done
|
||||
// by generating the expression in a inlined region. This is required because
|
||||
// the expression is captured in the enclosing target environment when the
|
||||
// teams directive is not combined with target.
|
||||
|
||||
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
|
||||
|
||||
// FIXME: Accommodate other combined directives with teams when they become
|
||||
// available.
|
||||
if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
|
||||
if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
|
||||
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
|
||||
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
|
||||
llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
|
||||
return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
|
||||
/*IsSigned=*/true);
|
||||
}
|
||||
|
||||
// If we have an enclosed teams directive but no num_teams clause we use
|
||||
// the default value 0.
|
||||
return CGF.Builder.getInt32(0);
|
||||
}
|
||||
|
||||
// No teams associated with the directive.
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// \brief Emit the thread_limit clause of an enclosed teams directive at the
|
||||
/// target region scope. If there is no teams directive associated with the
|
||||
/// target directive, or if there is no thread_limit clause associated with the
|
||||
/// enclosed teams directive, return nullptr.
|
||||
static llvm::Value *
|
||||
emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
|
||||
CodeGenFunction &CGF,
|
||||
const OMPExecutableDirective &D) {
|
||||
|
||||
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
|
||||
"teams directive expected to be "
|
||||
"emitted only for the host!");
|
||||
|
||||
// FIXME: For the moment we do not support combined directives with target and
|
||||
// teams, so we do not expect to get any thread_limit clause in the provided
|
||||
// directive. Once we support that, this assertion can be replaced by the
|
||||
// actual emission of the clause expression.
|
||||
assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
|
||||
"Not expecting clause in directive.");
|
||||
|
||||
// If the current target region has a teams region enclosed, we need to get
|
||||
// the thread limit to pass to the runtime function call. This is done
|
||||
// by generating the expression in a inlined region. This is required because
|
||||
// the expression is captured in the enclosing target environment when the
|
||||
// teams directive is not combined with target.
|
||||
|
||||
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
|
||||
|
||||
// FIXME: Accommodate other combined directives with teams when they become
|
||||
// available.
|
||||
if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
|
||||
if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
|
||||
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
|
||||
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
|
||||
llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
|
||||
return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
|
||||
/*IsSigned=*/true);
|
||||
}
|
||||
|
||||
// If we have an enclosed teams directive but no thread_limit clause we use
|
||||
// the default value 0.
|
||||
return CGF.Builder.getInt32(0);
|
||||
}
|
||||
|
||||
// No teams associated with the directive.
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
|
||||
const OMPExecutableDirective &D,
|
||||
llvm::Value *OutlinedFn,
|
||||
|
@ -4100,7 +4282,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
|
|||
// Fill up the pointer arrays and transfer execution to the device.
|
||||
auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
|
||||
hasVLACaptures, Device, OutlinedFnID, OffloadError,
|
||||
OffloadErrorQType](CodeGenFunction &CGF) {
|
||||
OffloadErrorQType, &D](CodeGenFunction &CGF) {
|
||||
unsigned PointerNumVal = BasePointers.size();
|
||||
llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
|
||||
llvm::Value *BasePointersArray;
|
||||
|
@ -4240,11 +4422,34 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
|
|||
else
|
||||
DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
|
||||
|
||||
llvm::Value *OffloadingArgs[] = {
|
||||
DeviceID, OutlinedFnID, PointerNum, BasePointersArray,
|
||||
PointersArray, SizesArray, MapTypesArray};
|
||||
auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
|
||||
OffloadingArgs);
|
||||
// Return value of the runtime offloading call.
|
||||
llvm::Value *Return;
|
||||
|
||||
auto *NumTeams = emitNumTeamsClauseForTargetDirective(*this, CGF, D);
|
||||
auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(*this, CGF, D);
|
||||
|
||||
// If we have NumTeams defined this means that we have an enclosed teams
|
||||
// region. Therefore we also expect to have ThreadLimit defined. These two
|
||||
// values should be defined in the presence of a teams directive, regardless
|
||||
// of having any clauses associated. If the user is using teams but no
|
||||
// clauses, these two values will be the default that should be passed to
|
||||
// the runtime library - a 32-bit integer with the value zero.
|
||||
if (NumTeams) {
|
||||
assert(ThreadLimit && "Thread limit expression should be available along "
|
||||
"with number of teams.");
|
||||
llvm::Value *OffloadingArgs[] = {
|
||||
DeviceID, OutlinedFnID, PointerNum,
|
||||
BasePointersArray, PointersArray, SizesArray,
|
||||
MapTypesArray, NumTeams, ThreadLimit};
|
||||
Return = CGF.EmitRuntimeCall(
|
||||
createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
|
||||
} else {
|
||||
llvm::Value *OffloadingArgs[] = {
|
||||
DeviceID, OutlinedFnID, PointerNum, BasePointersArray,
|
||||
PointersArray, SizesArray, MapTypesArray};
|
||||
Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
|
||||
OffloadingArgs);
|
||||
}
|
||||
|
||||
CGF.EmitStoreOfScalar(Return, OffloadError);
|
||||
};
|
||||
|
|
|
@ -2716,8 +2716,12 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
|
|||
CapturedVars);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
|
||||
llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
|
||||
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
|
||||
OMPLexicalScope Scope(*this, S);
|
||||
const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
|
||||
|
||||
// FIXME: We should fork teams here instead of just emit the statement.
|
||||
EmitStmt(CS.getCapturedStmt());
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPCancellationPointDirective(
|
||||
|
|
|
@ -0,0 +1,210 @@
|
|||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
|
||||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
|
||||
#ifdef CK1
|
||||
|
||||
int Gbla;
|
||||
long long Gblb;
|
||||
int &Gblc = Gbla;
|
||||
|
||||
// CK1-LABEL: teams_argument_global_local
|
||||
int teams_argument_global_local(int a){
|
||||
int comp = 1;
|
||||
|
||||
int la = 23;
|
||||
float lc = 25.0;
|
||||
|
||||
// CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
|
||||
// CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
|
||||
#pragma omp target
|
||||
#pragma omp teams
|
||||
{
|
||||
++comp;
|
||||
}
|
||||
|
||||
// CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
|
||||
// CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
|
||||
|
||||
// CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
|
||||
#pragma omp target
|
||||
#pragma omp teams num_teams(la)
|
||||
{
|
||||
++comp;
|
||||
}
|
||||
|
||||
// CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
|
||||
// CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
|
||||
|
||||
// CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
|
||||
#pragma omp target
|
||||
#pragma omp teams thread_limit(la)
|
||||
{
|
||||
++comp;
|
||||
}
|
||||
|
||||
// CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
|
||||
|
||||
// CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
|
||||
// CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
|
||||
// CK1-DAG: [[NTB]] = load i32, i32* %{{.+}},
|
||||
|
||||
// CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
|
||||
// CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]]
|
||||
// CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64
|
||||
// CK1-DAG: [[TLD]] = load float, float* %{{.+}},
|
||||
// CK1-DAG: [[TLB]] = load i64, i64* @Gblb,
|
||||
|
||||
// CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}})
|
||||
#pragma omp target
|
||||
#pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc)
|
||||
{
|
||||
++comp;
|
||||
}
|
||||
|
||||
// CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 {{.+}}, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
|
||||
|
||||
// CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], 1
|
||||
// CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
|
||||
|
||||
// CK1-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 2
|
||||
// CK1-DAG: [[TLA]] = load i32, i32* @Gbla,
|
||||
|
||||
// CK1: call void @{{.+}}(i{{.+}} {{.+}}
|
||||
#pragma omp target
|
||||
#pragma omp teams num_teams(Gblc+1) thread_limit(Gblc+2)
|
||||
{
|
||||
comp += Gblc;
|
||||
}
|
||||
|
||||
return comp;
|
||||
}
|
||||
|
||||
#endif // CK1
|
||||
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
|
||||
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
|
||||
#ifdef CK2
|
||||
|
||||
// CK2-DAG: [[SSI:%.+]] = type { i32, float }
|
||||
// CK2-DAG: [[SSL:%.+]] = type { i64, float }
|
||||
template <typename T>
|
||||
struct SS{
|
||||
T a;
|
||||
float b;
|
||||
};
|
||||
|
||||
SS<int> Gbla;
|
||||
SS<long long> Gblb;
|
||||
|
||||
// CK2-LABEL: teams_template_arg
|
||||
int teams_template_arg(void) {
|
||||
int comp = 1;
|
||||
|
||||
SS<int> la;
|
||||
SS<long long> lb;
|
||||
|
||||
// CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
|
||||
|
||||
// CK2-DAG: [[NT]] = load i32, i32* getelementptr inbounds ([[SSI]], [[SSI]]* @Gbla, i32 0, i32 0)
|
||||
|
||||
// CK2-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
|
||||
// CK2-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i64
|
||||
// CK2-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
|
||||
// CK2-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* %{{.+}}, i32 0, i32 1
|
||||
|
||||
// CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
|
||||
#pragma omp target
|
||||
#pragma omp teams num_teams(Gbla.a) thread_limit((long long)la.b)
|
||||
{
|
||||
++comp;
|
||||
}
|
||||
|
||||
// CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
|
||||
|
||||
// CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32
|
||||
// CK2-DAG: [[TLD]] = load i64, i64* getelementptr inbounds ([[SSL]], [[SSL]]* @Gblb, i32 0, i32 0),
|
||||
|
||||
// CK2-DAG: [[NT]] = trunc i64 [[NTA:%[^,]+]] to i32
|
||||
// CK2-DAG: [[NTA]] = fptosi float [[NTB:%[^,]+]] to i64
|
||||
// CK2-DAG: [[NTB]] = load float, float* [[NTC:%[^,]+]],
|
||||
// CK2-DAG: [[NTC]] = getelementptr inbounds [[SSL]], [[SSL]]* %{{.+}}, i32 0, i32 1
|
||||
|
||||
// CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
|
||||
#pragma omp target
|
||||
#pragma omp teams num_teams((long long)lb.b) thread_limit(Gblb.a)
|
||||
{
|
||||
++comp;
|
||||
}
|
||||
return comp;
|
||||
}
|
||||
#endif // CK2
|
||||
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
|
||||
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
|
||||
// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
|
||||
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
|
||||
#ifdef CK3
|
||||
|
||||
// CK3: [[SSI:%.+]] = type { i32, float }
|
||||
// CK3-LABEL: teams_template_struct
|
||||
|
||||
template <typename T, int X, long long Y>
|
||||
struct SS{
|
||||
T a;
|
||||
float b;
|
||||
|
||||
int foo(void) {
|
||||
int comp = 1;
|
||||
|
||||
// CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123)
|
||||
|
||||
// CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
|
||||
// CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0
|
||||
// CK3-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}},
|
||||
|
||||
// CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
|
||||
#pragma omp target
|
||||
#pragma omp teams num_teams(a) thread_limit(X)
|
||||
{
|
||||
++comp;
|
||||
}
|
||||
|
||||
// CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]])
|
||||
|
||||
// CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123
|
||||
// CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32
|
||||
// CK3-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
|
||||
// CK3-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* [[THIS:%[^,]+]], i32 0, i32 1
|
||||
|
||||
// CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
|
||||
#pragma omp target
|
||||
#pragma omp teams num_teams(Y) thread_limit((int)b+X)
|
||||
{
|
||||
++comp;
|
||||
}
|
||||
return comp;
|
||||
}
|
||||
};
|
||||
|
||||
int teams_template_struct(void) {
|
||||
SS<int, 123, 456> V;
|
||||
return V.foo();
|
||||
|
||||
}
|
||||
#endif // CK3
|
||||
#endif
|
Loading…
Reference in New Issue