forked from OSchip/llvm-project
[OpenMP] Codegen support for 'target teams' on the host.
This patch adds support for codegen of 'target teams' on the host. This combined directive has two captured statements, one for the 'teams' region, and the other for the 'parallel'. This target teams region is offloaded using the __tgt_target_teams() call. The patch sets the number of teams as an argument to this call. Reviewers: ABataev Differential Revision: https://reviews.llvm.org/D29084 llvm-svn: 293005
This commit is contained in:
parent
ae7e0da4f8
commit
99a1e0eba5
|
@ -875,8 +875,11 @@ void clang::getOpenMPCaptureRegions(
|
|||
case OMPD_parallel_sections:
|
||||
CaptureRegions.push_back(OMPD_parallel);
|
||||
break;
|
||||
case OMPD_teams:
|
||||
case OMPD_target_teams:
|
||||
CaptureRegions.push_back(OMPD_target);
|
||||
CaptureRegions.push_back(OMPD_teams);
|
||||
break;
|
||||
case OMPD_teams:
|
||||
case OMPD_simd:
|
||||
case OMPD_for:
|
||||
case OMPD_for_simd:
|
||||
|
|
|
@ -4911,18 +4911,28 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
|
|||
"teams directive expected to be "
|
||||
"emitted only for the host!");
|
||||
|
||||
auto &Bld = CGF.Builder;
|
||||
|
||||
// If the target directive is combined with a teams directive:
|
||||
// Return the value in the num_teams clause, if any.
|
||||
// Otherwise, return 0 to denote the runtime default.
|
||||
if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
|
||||
if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
|
||||
CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
|
||||
auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
|
||||
/*IgnoreResultAssign*/ true);
|
||||
return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
|
||||
/*IsSigned=*/true);
|
||||
}
|
||||
|
||||
// The default value is 0.
|
||||
return Bld.getInt32(0);
|
||||
}
|
||||
|
||||
// If the target directive is combined with a parallel directive but not a
|
||||
// teams directive, start one team.
|
||||
if (isOpenMPParallelDirective(D.getDirectiveKind()) &&
|
||||
!isOpenMPTeamsDirective(D.getDirectiveKind()))
|
||||
return CGF.Builder.getInt32(1);
|
||||
|
||||
// FIXME: For the moment we do not support combined directives with target and
|
||||
// teams, so we do not expect to get any num_teams clause in the provided
|
||||
// directive. Once we support that, this assertion can be replaced by the
|
||||
// actual emission of the clause expression.
|
||||
assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
|
||||
"Not expecting clause in directive.");
|
||||
if (isOpenMPParallelDirective(D.getDirectiveKind()))
|
||||
return Bld.getInt32(1);
|
||||
|
||||
// If the current target region has a teams region enclosed, we need to get
|
||||
// the number of teams to pass to the runtime function call. This is done
|
||||
|
@ -4940,13 +4950,13 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
|
|||
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
|
||||
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
|
||||
llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
|
||||
return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
|
||||
/*IsSigned=*/true);
|
||||
return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
|
||||
/*IsSigned=*/true);
|
||||
}
|
||||
|
||||
// If we have an enclosed teams directive but no num_teams clause we use
|
||||
// the default value 0.
|
||||
return CGF.Builder.getInt32(0);
|
||||
return Bld.getInt32(0);
|
||||
}
|
||||
|
||||
// No teams associated with the directive.
|
||||
|
@ -4986,9 +4996,20 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
|
|||
//
|
||||
// If this is not a teams directive return nullptr.
|
||||
|
||||
if (isOpenMPParallelDirective(D.getDirectiveKind())) {
|
||||
if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
|
||||
isOpenMPParallelDirective(D.getDirectiveKind())) {
|
||||
llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
|
||||
llvm::Value *NumThreadsVal = nullptr;
|
||||
llvm::Value *ThreadLimitVal = nullptr;
|
||||
|
||||
if (const auto *ThreadLimitClause =
|
||||
D.getSingleClause<OMPThreadLimitClause>()) {
|
||||
CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
|
||||
auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
|
||||
/*IgnoreResultAssign*/ true);
|
||||
ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
|
||||
/*IsSigned=*/true);
|
||||
}
|
||||
|
||||
if (const auto *NumThreadsClause =
|
||||
D.getSingleClause<OMPNumThreadsClause>()) {
|
||||
|
@ -5000,15 +5021,21 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
|
|||
Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
|
||||
}
|
||||
|
||||
return NumThreadsVal ? NumThreadsVal : DefaultThreadLimitVal;
|
||||
}
|
||||
// Select the lesser of thread_limit and num_threads.
|
||||
if (NumThreadsVal)
|
||||
ThreadLimitVal = ThreadLimitVal
|
||||
? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
|
||||
ThreadLimitVal),
|
||||
NumThreadsVal, ThreadLimitVal)
|
||||
: NumThreadsVal;
|
||||
|
||||
// FIXME: For the moment we do not support combined directives with target and
|
||||
// teams, so we do not expect to get any thread_limit clause in the provided
|
||||
// directive. Once we support that, this assertion can be replaced by the
|
||||
// actual emission of the clause expression.
|
||||
assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
|
||||
"Not expecting clause in directive.");
|
||||
// Set default value passed to the runtime if either teams or a target
|
||||
// parallel type directive is found but no clause is specified.
|
||||
if (!ThreadLimitVal)
|
||||
ThreadLimitVal = DefaultThreadLimitVal;
|
||||
|
||||
return ThreadLimitVal;
|
||||
}
|
||||
|
||||
// If the current target region has a teams region enclosed, we need to get
|
||||
// the thread limit to pass to the runtime function call. This is done
|
||||
|
@ -6217,6 +6244,10 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
|
|||
CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
|
||||
CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
|
||||
break;
|
||||
case Stmt::OMPTargetTeamsDirectiveClass:
|
||||
CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
|
||||
CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
|
||||
}
|
||||
|
|
|
@ -98,6 +98,22 @@ public:
|
|||
/*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
|
||||
};
|
||||
|
||||
/// Lexical scope for OpenMP teams construct, that handles correct codegen
|
||||
/// for captured expressions.
|
||||
class OMPTeamsScope final : public OMPLexicalScope {
|
||||
bool EmitPreInitStmt(const OMPExecutableDirective &S) {
|
||||
OpenMPDirectiveKind Kind = S.getDirectiveKind();
|
||||
return !isOpenMPTargetExecutionDirective(Kind) &&
|
||||
isOpenMPTeamsDirective(Kind);
|
||||
}
|
||||
|
||||
public:
|
||||
OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
|
||||
: OMPLexicalScope(CGF, S,
|
||||
/*AsInlined=*/false,
|
||||
/*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
|
||||
};
|
||||
|
||||
/// Private scope for OpenMP loop-based directives, that supports capturing
|
||||
/// of used expression from loop statement.
|
||||
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
|
||||
|
@ -2018,15 +2034,6 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
|
|||
});
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTargetTeamsDirective(
|
||||
const OMPTargetTeamsDirective &S) {
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(
|
||||
*this, OMPD_target_teams, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
CGF.EmitStmt(
|
||||
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
});
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
|
||||
const OMPTargetTeamsDistributeDirective &S) {
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(
|
||||
|
@ -3519,9 +3526,8 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
|
|||
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
|
||||
S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
|
||||
|
||||
const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
|
||||
const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
|
||||
const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
|
||||
const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
|
||||
const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
|
||||
if (NT || TL) {
|
||||
Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
|
||||
Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
|
||||
|
@ -3530,7 +3536,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
|
|||
S.getLocStart());
|
||||
}
|
||||
|
||||
OMPLexicalScope Scope(CGF, S);
|
||||
OMPTeamsScope Scope(CGF, S);
|
||||
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
|
||||
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
|
||||
CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
|
||||
|
@ -3549,6 +3555,39 @@ void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
|
|||
emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
|
||||
}
|
||||
|
||||
static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
|
||||
const OMPTargetTeamsDirective &S) {
|
||||
auto *CS = S.getCapturedStmt(OMPD_teams);
|
||||
Action.Enter(CGF);
|
||||
auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
// TODO: Add support for clauses.
|
||||
CGF.EmitStmt(CS->getCapturedStmt());
|
||||
};
|
||||
emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
|
||||
CodeGenModule &CGM, StringRef ParentName,
|
||||
const OMPTargetTeamsDirective &S) {
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||
emitTargetTeamsRegion(CGF, Action, S);
|
||||
};
|
||||
llvm::Function *Fn;
|
||||
llvm::Constant *Addr;
|
||||
// Emit target region as a standalone region.
|
||||
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
|
||||
S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
|
||||
assert(Fn && Addr && "Target device function emission failed.");
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTargetTeamsDirective(
|
||||
const OMPTargetTeamsDirective &S) {
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||
emitTargetTeamsRegion(CGF, Action, S);
|
||||
};
|
||||
emitCommonOMPTargetDirective(*this, S, CodeGen);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPCancellationPointDirective(
|
||||
const OMPCancellationPointDirective &S) {
|
||||
CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
|
||||
|
|
|
@ -2711,6 +2711,9 @@ public:
|
|||
static void
|
||||
EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName,
|
||||
const OMPTargetParallelDirective &S);
|
||||
static void
|
||||
EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName,
|
||||
const OMPTargetTeamsDirective &S);
|
||||
/// \brief Emit inner loop of the worksharing/simd construct.
|
||||
///
|
||||
/// \param S Directive, for which the inner loop must be emitted.
|
||||
|
|
|
@ -1594,8 +1594,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
|||
case OMPD_parallel_for:
|
||||
case OMPD_parallel_for_simd:
|
||||
case OMPD_parallel_sections:
|
||||
case OMPD_teams:
|
||||
case OMPD_target_teams: {
|
||||
case OMPD_teams: {
|
||||
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
|
||||
QualType KmpInt32PtrTy =
|
||||
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
|
||||
|
@ -1608,6 +1607,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
|||
Params);
|
||||
break;
|
||||
}
|
||||
case OMPD_target_teams:
|
||||
case OMPD_target_parallel: {
|
||||
Sema::CapturedParamNameType ParamsTarget[] = {
|
||||
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
||||
|
@ -1618,14 +1618,15 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
|||
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
|
||||
QualType KmpInt32PtrTy =
|
||||
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
|
||||
Sema::CapturedParamNameType ParamsParallel[] = {
|
||||
Sema::CapturedParamNameType ParamsTeamsOrParallel[] = {
|
||||
std::make_pair(".global_tid.", KmpInt32PtrTy),
|
||||
std::make_pair(".bound_tid.", KmpInt32PtrTy),
|
||||
std::make_pair(StringRef(), QualType()) // __context with shared vars
|
||||
};
|
||||
// Start a captured region for 'parallel'.
|
||||
// Start a captured region for 'teams' or 'parallel'. Both regions have
|
||||
// the same implicit parameters.
|
||||
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
|
||||
ParamsParallel);
|
||||
ParamsTeamsOrParallel);
|
||||
break;
|
||||
}
|
||||
case OMPD_simd:
|
||||
|
|
|
@ -0,0 +1,802 @@
|
|||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
|
||||
|
||||
// Test target codegen - host bc file has to be created first.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
|
||||
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
|
||||
// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
|
||||
// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
|
||||
|
||||
// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
|
||||
// CHECK-DAG: [[S1:%.+]] = type { double }
|
||||
// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
|
||||
// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
|
||||
// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
|
||||
|
||||
// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
|
||||
|
||||
// We have 8 target regions, but only 7 that actually will generate offloading
|
||||
// code, only 6 will have mapped arguments, and only 4 have all-constant map
|
||||
// sizes.
|
||||
|
||||
// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2]
|
||||
// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 288]
|
||||
// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2]
|
||||
// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 288, i32 288]
|
||||
// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 288, i32 35, i32 288, i32 35, i32 35, i32 288, i32 288, i32 35, i32 35]
|
||||
// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40]
|
||||
// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 288, i32 288, i32 35]
|
||||
// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40]
|
||||
// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 288, i32 288, i32 288, i32 35]
|
||||
// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 35, i32 288, i32 288, i32 288, i32 35]
|
||||
// CHECK-DAG: @{{.*}} = private constant i8 0
|
||||
// CHECK-DAG: @{{.*}} = private constant i8 0
|
||||
// CHECK-DAG: @{{.*}} = private constant i8 0
|
||||
// CHECK-DAG: @{{.*}} = private constant i8 0
|
||||
// CHECK-DAG: @{{.*}} = private constant i8 0
|
||||
// CHECK-DAG: @{{.*}} = private constant i8 0
|
||||
// CHECK-DAG: @{{.*}} = private constant i8 0
|
||||
|
||||
// TCHECK: @{{.+}} = constant [[ENTTY]]
|
||||
// TCHECK: @{{.+}} = constant [[ENTTY]]
|
||||
// TCHECK: @{{.+}} = constant [[ENTTY]]
|
||||
// TCHECK: @{{.+}} = constant [[ENTTY]]
|
||||
// TCHECK: @{{.+}} = constant [[ENTTY]]
|
||||
// TCHECK: @{{.+}} = constant [[ENTTY]]
|
||||
// TCHECK: @{{.+}} = constant [[ENTTY]]
|
||||
// TCHECK-NOT: @{{.+}} = constant [[ENTTY]]
|
||||
|
||||
// Check if offloading descriptor is created.
|
||||
// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
|
||||
// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
|
||||
// CHECK: [[DEVBEGIN:@.+]] = external constant i8
|
||||
// CHECK: [[DEVEND:@.+]] = external constant i8
|
||||
// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }]
|
||||
// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }
|
||||
|
||||
// Check target registration is registered as a Ctor.
|
||||
// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* [[REGFN:@.+]] to void ()*), i8* null }]
|
||||
|
||||
|
||||
template<typename tx, typename ty>
|
||||
struct TT{
|
||||
tx X;
|
||||
ty Y;
|
||||
};
|
||||
|
||||
// CHECK: define {{.*}}[[FOO:@.+]](
|
||||
int foo(int n) {
|
||||
int a = 0;
|
||||
short aa = 0;
|
||||
float b[10];
|
||||
float bn[n];
|
||||
double c[5][10];
|
||||
double cn[5][n];
|
||||
TT<long long, char> d;
|
||||
|
||||
// CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null, i32 0, i32 0)
|
||||
// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
|
||||
// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
|
||||
// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
|
||||
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
|
||||
// CHECK: [[FAIL]]
|
||||
// CHECK: call void [[HVT0:@.+]]()
|
||||
// CHECK-NEXT: br label %[[END]]
|
||||
// CHECK: [[END]]
|
||||
#pragma omp target teams
|
||||
{
|
||||
}
|
||||
|
||||
// CHECK: store i32 0, i32* [[RHV:%.+]], align 4
|
||||
// CHECK: store i32 -1, i32* [[RHV]], align 4
|
||||
// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
|
||||
// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
|
||||
// CHECK: call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}})
|
||||
#pragma omp target teams if(target: 0)
|
||||
{
|
||||
a += 1;
|
||||
}
|
||||
|
||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0)
|
||||
// CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
|
||||
// CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
|
||||
// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
|
||||
// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]]
|
||||
// CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
|
||||
// CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
|
||||
// CHECK-DAG: [[BP0]] = inttoptr i[[SZ]] %{{.+}} to i8*
|
||||
// CHECK-DAG: [[P0]] = inttoptr i[[SZ]] %{{.+}} to i8*
|
||||
|
||||
// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
|
||||
// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
|
||||
// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
|
||||
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
|
||||
// CHECK: [[FAIL]]
|
||||
// CHECK: call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}})
|
||||
// CHECK-NEXT: br label %[[END]]
|
||||
// CHECK: [[END]]
|
||||
#pragma omp target teams if(target: 1)
|
||||
{
|
||||
aa += 1;
|
||||
}
|
||||
|
||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
|
||||
// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
|
||||
// CHECK: [[IFTHEN]]
|
||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0)
|
||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
||||
|
||||
// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
|
||||
// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
|
||||
// CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
|
||||
// CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
|
||||
// CHECK-DAG: [[BP0]] = inttoptr i[[SZ]] %{{.+}} to i8*
|
||||
// CHECK-DAG: [[P0]] = inttoptr i[[SZ]] %{{.+}} to i8*
|
||||
|
||||
// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
|
||||
// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
|
||||
// CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
|
||||
// CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
|
||||
// CHECK-DAG: [[BP1]] = inttoptr i[[SZ]] %{{.+}} to i8*
|
||||
// CHECK-DAG: [[P1]] = inttoptr i[[SZ]] %{{.+}} to i8*
|
||||
// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
|
||||
// CHECK-NEXT: br label %[[IFEND:.+]]
|
||||
|
||||
// CHECK: [[IFELSE]]
|
||||
// CHECK: store i32 -1, i32* [[RHV]], align 4
|
||||
// CHECK-NEXT: br label %[[IFEND:.+]]
|
||||
|
||||
// CHECK: [[IFEND]]
|
||||
// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
|
||||
// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
|
||||
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
|
||||
// CHECK: [[FAIL]]
|
||||
// CHECK: call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}})
|
||||
// CHECK-NEXT: br label %[[END]]
|
||||
// CHECK: [[END]]
|
||||
#pragma omp target teams if(target: n>10)
|
||||
{
|
||||
a += 1;
|
||||
aa += 1;
|
||||
}
|
||||
|
||||
// We capture 3 VLA sizes in this target region
|
||||
// CHECK-64: [[A_VAL:%.+]] = load i32, i32* %{{.+}},
|
||||
// CHECK-64: [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32*
|
||||
// CHECK-64: store i32 [[A_VAL]], i32* [[A_ADDR]],
|
||||
// CHECK-64: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
||||
|
||||
// CHECK-32: [[A_VAL:%.+]] = load i32, i32* %{{.+}},
|
||||
// CHECK-32: store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
|
||||
// CHECK-32: [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
|
||||
|
||||
// CHECK: [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
|
||||
// CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
|
||||
// CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
|
||||
|
||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
|
||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||
// CHECK: [[TRY]]
|
||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
|
||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
|
||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
|
||||
// CHECK-DAG: [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0
|
||||
|
||||
// CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX0:[0-9]+]]
|
||||
// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX0]]
|
||||
// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX0]]
|
||||
// CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX1:[0-9]+]]
|
||||
// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX1]]
|
||||
// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX1]]
|
||||
// CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX2:[0-9]+]]
|
||||
// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX2]]
|
||||
// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX2]]
|
||||
// CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX3:[0-9]+]]
|
||||
// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX3]]
|
||||
// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX3]]
|
||||
// CHECK-DAG: [[SADDR4:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX4:[0-9]+]]
|
||||
// CHECK-DAG: [[BPADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX4]]
|
||||
// CHECK-DAG: [[PADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX4]]
|
||||
// CHECK-DAG: [[SADDR5:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX5:[0-9]+]]
|
||||
// CHECK-DAG: [[BPADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX5]]
|
||||
// CHECK-DAG: [[PADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX5]]
|
||||
// CHECK-DAG: [[SADDR6:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX6:[0-9]+]]
|
||||
// CHECK-DAG: [[BPADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX6]]
|
||||
// CHECK-DAG: [[PADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX6]]
|
||||
// CHECK-DAG: [[SADDR7:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX7:[0-9]+]]
|
||||
// CHECK-DAG: [[BPADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX7]]
|
||||
// CHECK-DAG: [[PADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX7]]
|
||||
// CHECK-DAG: [[SADDR8:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX8:[0-9]+]]
|
||||
// CHECK-DAG: [[BPADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX8]]
|
||||
// CHECK-DAG: [[PADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX8]]
|
||||
|
||||
// The names below are not necessarily consistent with the names used for the
|
||||
// addresses above as some are repeated.
|
||||
// CHECK-DAG: [[BP0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
|
||||
// CHECK-DAG: [[P0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
|
||||
// CHECK-DAG: store i8* [[BP0]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* [[P0]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK-DAG: [[BP1:%[^,]+]] = inttoptr i[[SZ]] [[VLA1]] to i8*
|
||||
// CHECK-DAG: [[P1:%[^,]+]] = inttoptr i[[SZ]] [[VLA1]] to i8*
|
||||
// CHECK-DAG: store i8* [[BP1]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* [[P1]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK-DAG: store i8* inttoptr (i[[SZ]] 5 to i8*), i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* inttoptr (i[[SZ]] 5 to i8*), i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK-DAG: [[BP3:%[^,]+]] = inttoptr i[[SZ]] [[A_CVAL]] to i8*
|
||||
// CHECK-DAG: [[P3:%[^,]+]] = inttoptr i[[SZ]] [[A_CVAL]] to i8*
|
||||
// CHECK-DAG: store i8* [[BP3]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* [[P3]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK-DAG: [[BP4:%[^,]+]] = bitcast [10 x float]* %{{.+}} to i8*
|
||||
// CHECK-DAG: [[P4:%[^,]+]] = bitcast [10 x float]* %{{.+}} to i8*
|
||||
// CHECK-DAG: store i8* [[BP4]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* [[P4]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] 40, i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK-DAG: [[BP5:%[^,]+]] = bitcast float* %{{.+}} to i8*
|
||||
// CHECK-DAG: [[P5:%[^,]+]] = bitcast float* %{{.+}} to i8*
|
||||
// CHECK-DAG: store i8* [[BP5]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* [[P5]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] [[BNSIZE]], i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK-DAG: [[BP6:%[^,]+]] = bitcast [5 x [10 x double]]* %{{.+}} to i8*
|
||||
// CHECK-DAG: [[P6:%[^,]+]] = bitcast [5 x [10 x double]]* %{{.+}} to i8*
|
||||
// CHECK-DAG: store i8* [[BP6]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* [[P6]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] 400, i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK-DAG: [[BP7:%[^,]+]] = bitcast double* %{{.+}} to i8*
|
||||
// CHECK-DAG: [[P7:%[^,]+]] = bitcast double* %{{.+}} to i8*
|
||||
// CHECK-DAG: store i8* [[BP7]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* [[P7]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] [[CNSIZE]], i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK-DAG: [[BP8:%[^,]+]] = bitcast [[TT]]* %{{.+}} to i8*
|
||||
// CHECK-DAG: [[P8:%[^,]+]] = bitcast [[TT]]* %{{.+}} to i8*
|
||||
// CHECK-DAG: store i8* [[BP8]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* [[P8]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
|
||||
// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
|
||||
// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
|
||||
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
|
||||
|
||||
// CHECK: [[FAIL]]
|
||||
// CHECK: call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
|
||||
// CHECK-NEXT: br label %[[END]]
|
||||
// CHECK: [[END]]
|
||||
#pragma omp target teams if(target: n>20)
|
||||
{
|
||||
a += 1;
|
||||
b[2] += 1.0;
|
||||
bn[3] += 1.0;
|
||||
c[1][2] += 1.0;
|
||||
cn[1][3] += 1.0;
|
||||
d.X += 1;
|
||||
d.Y += 1;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
// Check that the offloading functions are emitted and that the arguments are
|
||||
// correct and loaded correctly for the target regions in foo().
|
||||
|
||||
// CHECK: define internal void [[HVT0]]()
|
||||
// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*))
|
||||
//
|
||||
//
|
||||
// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid.)
|
||||
// CHECK: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
|
||||
// CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}})
|
||||
// Create stack storage and store argument in there.
|
||||
// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
|
||||
// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align
|
||||
// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
|
||||
// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
|
||||
// CHECK-64: [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align
|
||||
// CHECK-32: [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align
|
||||
// CHECK-64: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i32*
|
||||
// CHECK-64: store i32 [[AA]], i32* [[AA_C]], align
|
||||
// CHECK-32: store i32 [[AA]], i32* [[AA_CASTED]], align
|
||||
// CHECK: [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
|
||||
// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]])
|
||||
//
|
||||
//
|
||||
// CHECK: define internal {{.*}}void [[OMP_OUTLINED1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}})
|
||||
// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
|
||||
// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
|
||||
// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
|
||||
// CHECK-64: [[AA:%.+]] = load i32, i32* [[AA_CADDR]], align
|
||||
// CHECK-32: [[AA:%.+]] = load i32, i32* [[AA_ADDR]], align
|
||||
// CHECK: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// CHECK: define internal void [[HVT2]](i[[SZ]] %{{.+}})
|
||||
// Create stack storage and store argument in there.
|
||||
// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
|
||||
// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align
|
||||
// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
|
||||
// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
|
||||
// CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
|
||||
// CHECK: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16*
|
||||
// CHECK: store i16 [[AA]], i16* [[AA_C]], align
|
||||
// CHECK: [[PARAM:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
|
||||
// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED2:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM]])
|
||||
//
|
||||
//
|
||||
// CHECK: define internal {{.*}}void [[OMP_OUTLINED2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}})
|
||||
// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
|
||||
// CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
|
||||
// CHECK: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
|
||||
// CHECK: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
|
||||
// CHECK: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// CHECK: define internal void [[HVT3]]
|
||||
// Create stack storage and store argument in there.
|
||||
// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align
|
||||
// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
|
||||
// CHECK: [[A_CASTED:%.+]] = alloca i[[SZ]], align
|
||||
// CHECK: [[AA_CASTED:%.+]] = alloca i[[SZ]], align
|
||||
// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
|
||||
// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
|
||||
// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
|
||||
// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
|
||||
// CHECK-64-DAG:[[A:%.+]] = load i32, i32* [[A_CADDR]], align
|
||||
// CHECK-32-DAG:[[A:%.+]] = load i32, i32* [[A_ADDR]], align
|
||||
// CHECK-64-DAG:[[A_C:%.+]] = bitcast i[[SZ]]* [[A_CASTED]] to i32*
|
||||
// CHECK-64-DAG:store i32 [[A]], i32* [[A_C]], align
|
||||
// CHECK-32-DAG:store i32 [[A]], i32* [[A_CASTED]], align
|
||||
// CHECK-DAG: [[AA:%.+]] = load i16, i16* [[AA_CADDR]], align
|
||||
// CHECK-DAG: [[AA_C:%.+]] = bitcast i[[SZ]]* [[AA_CASTED]] to i16*
|
||||
// CHECK-DAG: store i16 [[AA]], i16* [[AA_C]], align
|
||||
// CHECK-DAG: [[PARAM1:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CASTED]], align
|
||||
// CHECK-DAG: [[PARAM2:%.+]] = load i[[SZ]], i[[SZ]]* [[AA_CASTED]], align
|
||||
// CHECK-DAG: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[PARAM1]], i[[SZ]] [[PARAM2]])
|
||||
//
|
||||
//
|
||||
// CHECK: define internal {{.*}}void [[OMP_OUTLINED3]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}})
|
||||
// CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align
|
||||
// CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align
|
||||
// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
|
||||
// CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
|
||||
// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
|
||||
// CHECK-DAG: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
|
||||
// CHECK: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
// CHECK: define internal void [[HVT4]]
|
||||
// Create local storage for each capture.
|
||||
// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_B:%.+]] = alloca [10 x float]*
|
||||
// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_BN:%.+]] = alloca float*
|
||||
// CHECK: [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]*
|
||||
// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_VLA3:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_CN:%.+]] = alloca double*
|
||||
// CHECK: [[LOCAL_D:%.+]] = alloca [[TT]]*
|
||||
// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
|
||||
// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
|
||||
// CHECK-DAG: store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]]
|
||||
// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
|
||||
// CHECK-DAG: store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]]
|
||||
// CHECK-DAG: store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]]
|
||||
// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
|
||||
// CHECK-DAG: store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]]
|
||||
// CHECK-DAG: store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]]
|
||||
// CHECK-DAG: store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]]
|
||||
|
||||
// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
|
||||
// CHECK-DAG: [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]],
|
||||
// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
|
||||
// CHECK-DAG: [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]],
|
||||
// CHECK-DAG: [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]],
|
||||
// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
|
||||
// CHECK-DAG: [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]],
|
||||
// CHECK-DAG: [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]],
|
||||
// CHECK-DAG: [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]],
|
||||
|
||||
// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
|
||||
// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
|
||||
// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
|
||||
// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
|
||||
// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
|
||||
// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
|
||||
|
||||
// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 9, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], [10 x float]*, i[[SZ]], float*, [5 x [10 x double]]*, i[[SZ]], i[[SZ]], double*, [[TT]]*)* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], [10 x float]* [[REF_B]], i[[SZ]] [[VAL_VLA1]], float* [[REF_BN]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] [[VAL_VLA2]], i[[SZ]] [[VAL_VLA3]], double* [[REF_CN]], [[TT]]* [[REF_D]])
|
||||
//
|
||||
//
|
||||
// CHECK: define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* %{{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* %{{.+}}, [[TT]]* {{.+}})
|
||||
// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
|
||||
|
||||
template<typename tx>
|
||||
tx ftemplate(int n) {
|
||||
tx a = 0;
|
||||
short aa = 0;
|
||||
tx b[10];
|
||||
|
||||
#pragma omp target teams if(target: n>40)
|
||||
{
|
||||
a += 1;
|
||||
aa += 1;
|
||||
b[2] += 1;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
static
|
||||
int fstatic(int n) {
|
||||
int a = 0;
|
||||
short aa = 0;
|
||||
char aaa = 0;
|
||||
int b[10];
|
||||
|
||||
#pragma omp target teams if(target: n>50)
|
||||
{
|
||||
a += 1;
|
||||
aa += 1;
|
||||
aaa += 1;
|
||||
b[2] += 1;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
struct S1 {
|
||||
double a;
|
||||
|
||||
int r1(int n){
|
||||
int b = n+1;
|
||||
short int c[2][n];
|
||||
|
||||
#pragma omp target teams if(target: n>60)
|
||||
{
|
||||
this->a = (double)b + 1.5;
|
||||
c[1][1] = ++a;
|
||||
}
|
||||
|
||||
return c[1][1] + (int)b;
|
||||
}
|
||||
};
|
||||
|
||||
// CHECK: define {{.*}}@{{.*}}bar{{.*}}
|
||||
int bar(int n){
|
||||
int a = 0;
|
||||
|
||||
// CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}})
|
||||
a += foo(n);
|
||||
|
||||
S1 S;
|
||||
// CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}})
|
||||
a += S.r1(n);
|
||||
|
||||
// CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}})
|
||||
a += fstatic(n);
|
||||
|
||||
// CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}})
|
||||
a += ftemplate<int>(n);
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
//
|
||||
// CHECK: define {{.*}}[[FS1]]
|
||||
//
|
||||
// CHECK: i8* @llvm.stacksave()
|
||||
// CHECK-64: [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32*
|
||||
// CHECK-64: store i32 %{{.+}}, i32* [[B_ADDR]],
|
||||
// CHECK-64: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]],
|
||||
|
||||
// CHECK-32: store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
|
||||
// CHECK-32: [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
|
||||
|
||||
// We capture 2 VLA sizes in this target region
|
||||
// CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
|
||||
// CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
|
||||
|
||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
|
||||
// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
|
||||
// CHECK: [[TRY]]
|
||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0)
|
||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
|
||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
|
||||
// CHECK-DAG: [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0
|
||||
// CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX0:[0-9]+]]
|
||||
// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]]
|
||||
// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]]
|
||||
// CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX1:[0-9]+]]
|
||||
// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]]
|
||||
// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]]
|
||||
// CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX2:[0-9]+]]
|
||||
// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]]
|
||||
// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]]
|
||||
// CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX3:[0-9]+]]
|
||||
// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]]
|
||||
// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]]
|
||||
|
||||
// The names below are not necessarily consistent with the names used for the
|
||||
// addresses above as some are repeated.
|
||||
// CHECK-DAG: [[BP0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
|
||||
// CHECK-DAG: [[P0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
|
||||
// CHECK-DAG: store i8* [[BP0]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* [[P0]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK-DAG: store i8* inttoptr (i[[SZ]] 2 to i8*), i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* inttoptr (i[[SZ]] 2 to i8*), i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK-DAG: [[BP2:%[^,]+]] = inttoptr i[[SZ]] [[B_CVAL]] to i8*
|
||||
// CHECK-DAG: [[P2:%[^,]+]] = inttoptr i[[SZ]] [[B_CVAL]] to i8*
|
||||
// CHECK-DAG: store i8* [[BP2]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* [[P2]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK-DAG: [[BP3:%[^,]+]] = bitcast [[S1]]* %{{.+}} to i8*
|
||||
// CHECK-DAG: [[P3:%[^,]+]] = bitcast [[S1]]* %{{.+}} to i8*
|
||||
// CHECK-DAG: store i8* [[BP3]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* [[P3]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] 8, i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK-DAG: [[BP4:%[^,]+]] = bitcast i16* %{{.+}} to i8*
|
||||
// CHECK-DAG: [[P4:%[^,]+]] = bitcast i16* %{{.+}} to i8*
|
||||
// CHECK-DAG: store i8* [[BP4]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i8* [[P4]], i8** {{%[^,]+}}
|
||||
// CHECK-DAG: store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}}
|
||||
|
||||
// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
|
||||
// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
|
||||
// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
|
||||
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
|
||||
|
||||
// CHECK: [[FAIL]]
|
||||
// CHECK: call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
|
||||
// CHECK-NEXT: br label %[[END]]
|
||||
// CHECK: [[END]]
|
||||
|
||||
//
|
||||
// CHECK: define {{.*}}[[FSTATIC]]
|
||||
//
|
||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
|
||||
// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
|
||||
// CHECK: [[IFTHEN]]
|
||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0)
|
||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
|
||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
|
||||
|
||||
// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 0
|
||||
// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 0
|
||||
// CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
|
||||
// CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
|
||||
// CHECK-DAG: [[BP0]] = inttoptr i[[SZ]] [[VAL0:%.+]] to i8*
|
||||
// CHECK-DAG: [[P0]] = inttoptr i[[SZ]] [[VAL0]] to i8*
|
||||
|
||||
// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 1
|
||||
// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 1
|
||||
// CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
|
||||
// CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
|
||||
// CHECK-DAG: [[BP1]] = inttoptr i[[SZ]] [[VAL1:%.+]] to i8*
|
||||
// CHECK-DAG: [[P1]] = inttoptr i[[SZ]] [[VAL1]] to i8*
|
||||
|
||||
// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 2
|
||||
// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 2
|
||||
// CHECK-DAG: store i8* [[BP2:%[^,]+]], i8** [[BPADDR2]]
|
||||
// CHECK-DAG: store i8* [[P2:%[^,]+]], i8** [[PADDR2]]
|
||||
|
||||
// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 3
|
||||
// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 3
|
||||
// CHECK-DAG: store i8* [[BP3:%[^,]+]], i8** [[BPADDR3]]
|
||||
// CHECK-DAG: store i8* [[P3:%[^,]+]], i8** [[PADDR3]]
|
||||
// CHECK-DAG: [[BP3]] = bitcast [10 x i32]* %{{.+}} to i8*
|
||||
// CHECK-DAG: [[P3]] = bitcast [10 x i32]* %{{.+}} to i8*
|
||||
|
||||
// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
|
||||
// CHECK-NEXT: br label %[[IFEND:.+]]
|
||||
|
||||
// CHECK: [[IFELSE]]
|
||||
// CHECK: store i32 -1, i32* [[RHV]], align 4
|
||||
// CHECK-NEXT: br label %[[IFEND:.+]]
|
||||
|
||||
// CHECK: [[IFEND]]
|
||||
// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
|
||||
// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
|
||||
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
|
||||
// CHECK: [[FAIL]]
|
||||
// CHECK: call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
|
||||
// CHECK-NEXT: br label %[[END]]
|
||||
// CHECK: [[END]]
|
||||
|
||||
//
|
||||
// CHECK: define {{.*}}[[FTEMPLATE]]
|
||||
//
|
||||
// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
|
||||
// CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
|
||||
// CHECK: [[IFTHEN]]
|
||||
// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0)
|
||||
// CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
|
||||
// CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
|
||||
|
||||
// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0
|
||||
// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0
|
||||
// CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
|
||||
// CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
|
||||
// CHECK-DAG: [[BP0]] = inttoptr i[[SZ]] [[VAL0:%.+]] to i8*
|
||||
// CHECK-DAG: [[P0]] = inttoptr i[[SZ]] [[VAL0]] to i8*
|
||||
|
||||
// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1
|
||||
// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1
|
||||
// CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
|
||||
// CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
|
||||
// CHECK-DAG: [[BP1]] = inttoptr i[[SZ]] [[VAL1:%.+]] to i8*
|
||||
// CHECK-DAG: [[P1]] = inttoptr i[[SZ]] [[VAL1]] to i8*
|
||||
|
||||
// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2
|
||||
// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2
|
||||
// CHECK-DAG: store i8* [[BP2:%[^,]+]], i8** [[BPADDR2]]
|
||||
// CHECK-DAG: store i8* [[P2:%[^,]+]], i8** [[PADDR2]]
|
||||
// CHECK-DAG: [[BP2]] = bitcast [10 x i32]* %{{.+}} to i8*
|
||||
// CHECK-DAG: [[P2]] = bitcast [10 x i32]* %{{.+}} to i8*
|
||||
|
||||
// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4
|
||||
// CHECK-NEXT: br label %[[IFEND:.+]]
|
||||
|
||||
// CHECK: [[IFELSE]]
|
||||
// CHECK: store i32 -1, i32* [[RHV]], align 4
|
||||
// CHECK-NEXT: br label %[[IFEND:.+]]
|
||||
|
||||
// CHECK: [[IFEND]]
|
||||
// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
|
||||
// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
|
||||
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
|
||||
// CHECK: [[FAIL]]
|
||||
// CHECK: call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
|
||||
// CHECK-NEXT: br label %[[END]]
|
||||
// CHECK: [[END]]
|
||||
|
||||
|
||||
|
||||
// Check that the offloading functions are emitted and that the arguments are
|
||||
// correct and loaded correctly for the target regions of the callees of bar().
|
||||
|
||||
// CHECK: define internal void [[HVT7]]
|
||||
// Create local storage for each capture.
|
||||
// CHECK: [[LOCAL_THIS:%.+]] = alloca [[S1]]*
|
||||
// CHECK: [[LOCAL_B:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_C:%.+]] = alloca i16*
|
||||
// CHECK: [[LOCAL_B_CASTED:%.+]] = alloca i[[SZ]]
|
||||
// CHECK-DAG: store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]]
|
||||
// CHECK-DAG: store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]]
|
||||
// CHECK-DAG: store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
|
||||
// CHECK-DAG: store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
|
||||
// CHECK-DAG: store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]]
|
||||
// Store captures in the context.
|
||||
// CHECK-DAG: [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]],
|
||||
// CHECK-64-DAG:[[CONV_BP:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32*
|
||||
// CHECK-DAG: [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
|
||||
// CHECK-DAG: [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
|
||||
// CHECK-DAG: [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]],
|
||||
|
||||
// CHECK-64-DAG:[[CONV_B:%.+]] = load i32, i32* [[CONV_BP]]
|
||||
// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_B_CASTED]] to i32*
|
||||
// CHECK-64-DAG:store i32 [[CONV_B]], i32* [[CONV]], align
|
||||
// CHECK-32-DAG:[[LOCAL_BV:%.+]] = load i32, i32* [[LOCAL_B]]
|
||||
// CHECK-32-DAG:store i32 [[LOCAL_BV]], i32* [[LOCAL_B_CASTED]], align
|
||||
// CHECK-DAG: [[REF_B:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_B_CASTED]],
|
||||
|
||||
// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[S1]]*, i[[SZ]], i[[SZ]], i[[SZ]], i16*)* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), [[S1]]* [[REF_THIS]], i[[SZ]] [[REF_B]], i[[SZ]] [[VAL_VLA1]], i[[SZ]] [[VAL_VLA2]], i16* [[REF_C]])
|
||||
//
|
||||
//
|
||||
// CHECK: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* %{{.+}})
|
||||
// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
|
||||
|
||||
|
||||
// CHECK: define internal void [[HVT6]]
|
||||
// Create local storage for each capture.
|
||||
// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_AAA:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]*
|
||||
// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_AAA_CASTED:%.+]] = alloca i[[SZ]]
|
||||
// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
|
||||
// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
|
||||
// CHECK-DAG: store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]]
|
||||
// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
|
||||
// Store captures in the context.
|
||||
// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
|
||||
// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
|
||||
// CHECK-DAG: [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8*
|
||||
// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
|
||||
|
||||
// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
|
||||
// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
|
||||
// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
|
||||
// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
|
||||
// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
|
||||
// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
|
||||
|
||||
// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]]
|
||||
// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16*
|
||||
// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align
|
||||
// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]],
|
||||
|
||||
// CHECK-DAG: [[CONV_AAA:%.+]] = load i8, i8* [[CONV_AAAP]]
|
||||
// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA_CASTED]] to i8*
|
||||
// CHECK-DAG: store i8 [[CONV_AAA]], i8* [[CONV]], align
|
||||
// CHECK-DAG: [[REF_AAA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AAA_CASTED]],
|
||||
|
||||
// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED6:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], i[[SZ]] [[REF_AAA]], [10 x i32]* [[REF_B]])
|
||||
//
|
||||
//
|
||||
// CHECK: define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}})
|
||||
// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
|
||||
|
||||
// CHECK: define internal void [[HVT5]]
|
||||
// Create local storage for each capture.
|
||||
// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]*
|
||||
// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]]
|
||||
// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]]
|
||||
// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
|
||||
// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
|
||||
// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
|
||||
// Store captures in the context.
|
||||
// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
|
||||
// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
|
||||
// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
|
||||
|
||||
// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]]
|
||||
// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32*
|
||||
// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align
|
||||
// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]]
|
||||
// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align
|
||||
// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]],
|
||||
|
||||
// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]]
|
||||
// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16*
|
||||
// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align
|
||||
// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]],
|
||||
|
||||
// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]])
|
||||
//
|
||||
//
|
||||
// CHECK: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}})
|
||||
// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function.
|
||||
|
||||
#endif
|
|
@ -0,0 +1,437 @@
|
|||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
||||
|
||||
// Test target teams codegen - host bc file has to be created first.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
|
||||
|
||||
// Check that no target code is emmitted if no omptests flag was provided.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET
|
||||
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK-DAG: [[SA:%.+]] = type { [4 x i32] }
|
||||
// CHECK-DAG: [[SB:%.+]] = type { [8 x i32] }
|
||||
// CHECK-DAG: [[SC:%.+]] = type { [16 x i32] }
|
||||
// CHECK-DAG: [[SD:%.+]] = type { [32 x i32] }
|
||||
// CHECK-DAG: [[SE:%.+]] = type { [64 x i32] }
|
||||
// CHECK-DAG: [[ST1:%.+]] = type { [228 x i32] }
|
||||
// CHECK-DAG: [[ST2:%.+]] = type { [1128 x i32] }
|
||||
// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
|
||||
// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
|
||||
// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
|
||||
|
||||
// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
|
||||
|
||||
// CHECK-DAG: [[A1:@.+]] = internal global [[SA]]
|
||||
// CHECK-DAG: [[A2:@.+]] = global [[SA]]
|
||||
// CHECK-DAG: [[B1:@.+]] = global [[SB]]
|
||||
// CHECK-DAG: [[B2:@.+]] = global [[SB]]
|
||||
// CHECK-DAG: [[C1:@.+]] = internal global [[SC]]
|
||||
// CHECK-DAG: [[D1:@.+]] = global [[SD]]
|
||||
// CHECK-DAG: [[E1:@.+]] = global [[SE]]
|
||||
// CHECK-DAG: [[T1:@.+]] = global [[ST1]]
|
||||
// CHECK-DAG: [[T2:@.+]] = global [[ST2]]
|
||||
|
||||
// CHECK-NTARGET-DAG: [[SA:%.+]] = type { [4 x i32] }
|
||||
// CHECK-NTARGET-DAG: [[SB:%.+]] = type { [8 x i32] }
|
||||
// CHECK-NTARGET-DAG: [[SC:%.+]] = type { [16 x i32] }
|
||||
// CHECK-NTARGET-DAG: [[SD:%.+]] = type { [32 x i32] }
|
||||
// CHECK-NTARGET-DAG: [[SE:%.+]] = type { [64 x i32] }
|
||||
// CHECK-NTARGET-DAG: [[ST1:%.+]] = type { [228 x i32] }
|
||||
// CHECK-NTARGET-DAG: [[ST2:%.+]] = type { [1128 x i32] }
|
||||
// CHECK-NTARGET-NOT: type { i8*, i8*, %
|
||||
// CHECK-NTARGET-NOT: type { i32, %
|
||||
|
||||
// We have 7 target regions
|
||||
|
||||
// CHECK-DAG: {{@.+}} = private constant i8 0
|
||||
// TCHECK-NOT: {{@.+}} = private constant i8 0
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
|
||||
// CHECK-DAG: {{@.+}} = private constant i8 0
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
|
||||
// CHECK-DAG: {{@.+}} = private constant i8 0
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
|
||||
// CHECK-DAG: {{@.+}} = private constant i8 0
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
|
||||
// CHECK-DAG: {{@.+}} = private constant i8 0
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
|
||||
// CHECK-DAG: {{@.+}} = private constant i8 0
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
|
||||
// CHECK-DAG: {{@.+}} = private constant i8 0
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
|
||||
// CHECK-DAG: {{@.+}} = private constant i8 0
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
|
||||
// CHECK-DAG: {{@.+}} = private constant i8 0
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
|
||||
// CHECK-DAG: {{@.+}} = private constant i8 0
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
|
||||
// CHECK-DAG: {{@.+}} = private constant i8 0
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
|
||||
// CHECK-DAG: {{@.+}} = private constant i8 0
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i[[SZ]]] [i[[SZ]] 4]
|
||||
// CHECK-DAG: {{@.+}} = private unnamed_addr constant [1 x i32] [i32 288]
|
||||
|
||||
// CHECK-NTARGET-NOT: private constant i8 0
|
||||
// CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i
|
||||
|
||||
// CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
|
||||
// CHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
|
||||
// CHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
|
||||
// CHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
|
||||
// CHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
|
||||
// CHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
|
||||
// CHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
|
||||
// CHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
|
||||
// CHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
|
||||
// CHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
|
||||
// CHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
|
||||
// CHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
|
||||
// CHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* @{{.*}}, i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
|
||||
// TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00"
|
||||
// TCHECK-DAG: [[ENTRY1:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR1]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00"
|
||||
// TCHECK-DAG: [[ENTRY2:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR2]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00"
|
||||
// TCHECK-DAG: [[ENTRY3:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR3]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00"
|
||||
// TCHECK-DAG: [[ENTRY4:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR4]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00"
|
||||
// TCHECK-DAG: [[ENTRY5:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR5]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00"
|
||||
// TCHECK-DAG: [[ENTRY6:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR6]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00"
|
||||
// TCHECK-DAG: [[ENTRY7:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR7]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00"
|
||||
// TCHECK-DAG: [[ENTRY8:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR8]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00"
|
||||
// TCHECK-DAG: [[ENTRY9:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR9]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00"
|
||||
// TCHECK-DAG: [[ENTRY10:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR10]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00"
|
||||
// TCHECK-DAG: [[ENTRY11:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR11]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
// TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00"
|
||||
// TCHECK-DAG: [[ENTRY12:@.+]] = constant [[ENTTY]] { i8* bitcast (void (i[[SZ]])* @{{.*}} to i8*), i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[NAMEPTR12]], i32 0, i32 0), i[[SZ]] 0, i32 0, i32 0 }, section ".omp_offloading.entries", align 1
|
||||
|
||||
// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
|
||||
// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
|
||||
// CHECK: [[DEVBEGIN:@.+]] = external constant i8
|
||||
// CHECK: [[DEVEND:@.+]] = external constant i8
|
||||
// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }]
|
||||
// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }
|
||||
|
||||
// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function.
|
||||
// CHECK: @llvm.global_ctors = appending global [4 x { i32, void ()*, i8* }] [
|
||||
// CHECK-SAME: { i32, void ()*, i8* } { i32 500, void ()* [[P500:@[^,]+]], i8* null },
|
||||
// CHECK-SAME: { i32, void ()*, i8* } { i32 501, void ()* [[P501:@[^,]+]], i8* null },
|
||||
// CHECK-SAME: { i32, void ()*, i8* } { i32 65535, void ()* [[PMAX:@[^,]+]], i8* null },
|
||||
// CHECK-SAME: { i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* [[REGFN:@.+]] to void ()*), i8* null }]
|
||||
|
||||
// CHECK-NTARGET: @llvm.global_ctors = appending global [3 x { i32, void ()*, i8* }] [
|
||||
|
||||
extern int *R;
|
||||
|
||||
struct SA {
|
||||
int arr[4];
|
||||
void foo() {
|
||||
int a = *R;
|
||||
a += 1;
|
||||
*R = a;
|
||||
}
|
||||
SA() {
|
||||
int a = *R;
|
||||
a += 2;
|
||||
*R = a;
|
||||
}
|
||||
~SA() {
|
||||
int a = *R;
|
||||
a += 3;
|
||||
*R = a;
|
||||
}
|
||||
};
|
||||
|
||||
struct SB {
|
||||
int arr[8];
|
||||
void foo() {
|
||||
int a = *R;
|
||||
#pragma omp target teams
|
||||
a += 4;
|
||||
*R = a;
|
||||
}
|
||||
SB() {
|
||||
int a = *R;
|
||||
a += 5;
|
||||
*R = a;
|
||||
}
|
||||
~SB() {
|
||||
int a = *R;
|
||||
a += 6;
|
||||
*R = a;
|
||||
}
|
||||
};
|
||||
|
||||
struct SC {
|
||||
int arr[16];
|
||||
void foo() {
|
||||
int a = *R;
|
||||
a += 7;
|
||||
*R = a;
|
||||
}
|
||||
SC() {
|
||||
int a = *R;
|
||||
#pragma omp target teams
|
||||
a += 8;
|
||||
*R = a;
|
||||
}
|
||||
~SC() {
|
||||
int a = *R;
|
||||
a += 9;
|
||||
*R = a;
|
||||
}
|
||||
};
|
||||
|
||||
struct SD {
|
||||
int arr[32];
|
||||
void foo() {
|
||||
int a = *R;
|
||||
a += 10;
|
||||
*R = a;
|
||||
}
|
||||
SD() {
|
||||
int a = *R;
|
||||
a += 11;
|
||||
*R = a;
|
||||
}
|
||||
~SD() {
|
||||
int a = *R;
|
||||
#pragma omp target teams
|
||||
a += 12;
|
||||
*R = a;
|
||||
}
|
||||
};
|
||||
|
||||
struct SE {
|
||||
int arr[64];
|
||||
void foo() {
|
||||
int a = *R;
|
||||
#pragma omp target teams if(target: 0)
|
||||
a += 13;
|
||||
*R = a;
|
||||
}
|
||||
SE() {
|
||||
int a = *R;
|
||||
#pragma omp target teams
|
||||
a += 14;
|
||||
*R = a;
|
||||
}
|
||||
~SE() {
|
||||
int a = *R;
|
||||
#pragma omp target teams
|
||||
a += 15;
|
||||
*R = a;
|
||||
}
|
||||
};
|
||||
|
||||
template <int x>
|
||||
struct ST {
|
||||
int arr[128 + x];
|
||||
void foo() {
|
||||
int a = *R;
|
||||
#pragma omp target teams
|
||||
a += 16 + x;
|
||||
*R = a;
|
||||
}
|
||||
ST() {
|
||||
int a = *R;
|
||||
#pragma omp target teams
|
||||
a += 17 + x;
|
||||
*R = a;
|
||||
}
|
||||
~ST() {
|
||||
int a = *R;
|
||||
#pragma omp target teams
|
||||
a += 18 + x;
|
||||
*R = a;
|
||||
}
|
||||
};
|
||||
|
||||
// We have to make sure we us all the target regions:
|
||||
//CHECK-DAG: define internal void @[[NAME1]](
|
||||
//CHECK-DAG: call void @[[NAME1]](
|
||||
//CHECK-DAG: define internal void @[[NAME2]](
|
||||
//CHECK-DAG: call void @[[NAME2]](
|
||||
//CHECK-DAG: define internal void @[[NAME3]](
|
||||
//CHECK-DAG: call void @[[NAME3]](
|
||||
//CHECK-DAG: define internal void @[[NAME4]](
|
||||
//CHECK-DAG: call void @[[NAME4]](
|
||||
//CHECK-DAG: define internal void @[[NAME5]](
|
||||
//CHECK-DAG: call void @[[NAME5]](
|
||||
//CHECK-DAG: define internal void @[[NAME6]](
|
||||
//CHECK-DAG: call void @[[NAME6]](
|
||||
//CHECK-DAG: define internal void @[[NAME7]](
|
||||
//CHECK-DAG: call void @[[NAME7]](
|
||||
//CHECK-DAG: define internal void @[[NAME8]](
|
||||
//CHECK-DAG: call void @[[NAME8]](
|
||||
//CHECK-DAG: define internal void @[[NAME9]](
|
||||
//CHECK-DAG: call void @[[NAME9]](
|
||||
//CHECK-DAG: define internal void @[[NAME10]](
|
||||
//CHECK-DAG: call void @[[NAME10]](
|
||||
//CHECK-DAG: define internal void @[[NAME11]](
|
||||
//CHECK-DAG: call void @[[NAME11]](
|
||||
//CHECK-DAG: define internal void @[[NAME12]](
|
||||
//CHECK-DAG: call void @[[NAME12]](
|
||||
|
||||
//TCHECK-DAG: define void @[[NAME1]](
|
||||
//TCHECK-DAG: define void @[[NAME2]](
|
||||
//TCHECK-DAG: define void @[[NAME3]](
|
||||
//TCHECK-DAG: define void @[[NAME4]](
|
||||
//TCHECK-DAG: define void @[[NAME5]](
|
||||
//TCHECK-DAG: define void @[[NAME6]](
|
||||
//TCHECK-DAG: define void @[[NAME7]](
|
||||
//TCHECK-DAG: define void @[[NAME8]](
|
||||
//TCHECK-DAG: define void @[[NAME9]](
|
||||
//TCHECK-DAG: define void @[[NAME10]](
|
||||
//TCHECK-DAG: define void @[[NAME11]](
|
||||
//TCHECK-DAG: define void @[[NAME12]](
|
||||
|
||||
// CHECK-NTARGET-NOT: __tgt_target
|
||||
// CHECK-NTARGET-NOT: __tgt_register_lib
|
||||
// CHECK-NTARGET-NOT: __tgt_unregister_lib
|
||||
|
||||
// TCHECK-NOT: __tgt_target
|
||||
// TCHECK-NOT: __tgt_register_lib
|
||||
// TCHECK-NOT: __tgt_unregister_lib
|
||||
|
||||
// We have 2 initializers with priority 500
|
||||
//CHECK: define internal void [[P500]](
|
||||
//CHECK: call void @{{.+}}()
|
||||
//CHECK: call void @{{.+}}()
|
||||
//CHECK-NOT: call void @{{.+}}()
|
||||
//CHECK: ret void
|
||||
|
||||
// We have 1 initializers with priority 501
|
||||
//CHECK: define internal void [[P501]](
|
||||
//CHECK: call void @{{.+}}()
|
||||
//CHECK-NOT: call void @{{.+}}()
|
||||
//CHECK: ret void
|
||||
|
||||
// We have 6 initializers with default priority
|
||||
//CHECK: define internal void [[PMAX]](
|
||||
//CHECK: call void @{{.+}}()
|
||||
//CHECK: call void @{{.+}}()
|
||||
//CHECK: call void @{{.+}}()
|
||||
//CHECK: call void @{{.+}}()
|
||||
//CHECK: call void @{{.+}}()
|
||||
//CHECK: call void @{{.+}}()
|
||||
//CHECK-NOT: call void @{{.+}}()
|
||||
//CHECK: ret void
|
||||
|
||||
// Check registration and unregistration
|
||||
|
||||
//CHECK: define internal void [[UNREGFN:@.+]](i8*)
|
||||
//CHECK: call i32 @__tgt_unregister_lib([[DSCTY]]* [[DESC]])
|
||||
//CHECK: ret void
|
||||
//CHECK: declare i32 @__tgt_unregister_lib([[DSCTY]]*)
|
||||
|
||||
//CHECK: define internal void [[REGFN]](i8*)
|
||||
//CHECK: call i32 @__tgt_register_lib([[DSCTY]]* [[DESC]])
|
||||
//CHECK: call i32 @__cxa_atexit(void (i8*)* [[UNREGFN]], i8* bitcast ([[DSCTY]]* [[DESC]] to i8*),
|
||||
//CHECK: ret void
|
||||
//CHECK: declare i32 @__tgt_register_lib([[DSCTY]]*)
|
||||
|
||||
static __attribute__((init_priority(500))) SA a1;
|
||||
SA a2;
|
||||
SB __attribute__((init_priority(500))) b1;
|
||||
SB __attribute__((init_priority(501))) b2;
|
||||
static SC c1;
|
||||
SD d1;
|
||||
SE e1;
|
||||
ST<100> t1;
|
||||
ST<1000> t2;
|
||||
|
||||
|
||||
int bar(int a){
|
||||
int r = a;
|
||||
|
||||
a1.foo();
|
||||
a2.foo();
|
||||
b1.foo();
|
||||
b2.foo();
|
||||
c1.foo();
|
||||
d1.foo();
|
||||
e1.foo();
|
||||
t1.foo();
|
||||
t2.foo();
|
||||
|
||||
#pragma omp target teams
|
||||
++r;
|
||||
|
||||
return r + *R;
|
||||
}
|
||||
|
||||
// Check metadata is properly generated:
|
||||
// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 193, i32 {{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 243, i32 {{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 259, i32 {{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 265, i32 {{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 276, i32 {{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 282, i32 {{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 402, i32 {{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 288, i32 {{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 282, i32 {{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 288, i32 {{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 276, i32 {{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 218, i32 {{[0-9]+}}}
|
||||
|
||||
// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 193, i32 {{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 243, i32 {{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 259, i32 {{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 265, i32 {{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 276, i32 {{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 282, i32 {{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 402, i32 {{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 288, i32 {{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 282, i32 {{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 288, i32 {{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 276, i32 {{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 218, i32 {{[0-9]+}}}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,66 @@
|
|||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
||||
|
||||
// Test target teams codegen - host bc file has to be created first.
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK
|
||||
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK: [[CA:%.+]] = type { i32* }
|
||||
|
||||
// CHECK: define {{.*}}i32 @[[NNAME:.+]](i32 {{.*}}%{{.+}})
|
||||
int nested(int a){
|
||||
// CHECK: call void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME]]_l[[T1L:[0-9]+]](
|
||||
#pragma omp target teams
|
||||
++a;
|
||||
|
||||
// CHECK: call void @"[[LNAME:.+]]"([[CA]]*
|
||||
auto F = [&](){
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp target teams
|
||||
++a;
|
||||
}
|
||||
};
|
||||
|
||||
F();
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T1L]](
|
||||
// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID:[0-9a-f]+_[0-9a-f]+]]_[[NNAME:.+]]_l[[T1L:[0-9]+]](
|
||||
|
||||
// CHECK: define {{.*}}void @"[[LNAME]]"(
|
||||
// CHECK: call void {{.*}}@__kmpc_fork_call{{.+}}[[PNAME:@.+]] to
|
||||
|
||||
// CHECK: define {{.*}}void [[PNAME]](
|
||||
// CHECK: call void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L:[0-9]+]](
|
||||
|
||||
// CHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME]]_l[[T2L]](
|
||||
// TCHECK: define {{.*}}void @__omp_offloading_[[FILEID]]_[[NNAME:.+]]_l[[T2L:[0-9]+]](
|
||||
|
||||
|
||||
// Check metadata is properly generated:
|
||||
// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
|
||||
// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
|
||||
|
||||
// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}}
|
||||
// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}}
|
||||
#endif
|
Loading…
Reference in New Issue