forked from OSchip/llvm-project
[OpenMP] Initial implementation of code generation for pragma 'teams distribute parallel for simd' on host
https://reviews.llvm.org/D40795 This includes regression tests for all associated clauses. llvm-svn: 319696
This commit is contained in:
parent
e117129ef7
commit
56a2aa4ddc
|
@ -902,6 +902,7 @@ void clang::getOpenMPCaptureRegions(
|
|||
CaptureRegions.push_back(OMPD_target);
|
||||
break;
|
||||
case OMPD_teams_distribute_parallel_for:
|
||||
case OMPD_teams_distribute_parallel_for_simd:
|
||||
CaptureRegions.push_back(OMPD_teams);
|
||||
CaptureRegions.push_back(OMPD_parallel);
|
||||
break;
|
||||
|
@ -935,7 +936,6 @@ void clang::getOpenMPCaptureRegions(
|
|||
case OMPD_atomic:
|
||||
case OMPD_target_data:
|
||||
case OMPD_distribute_simd:
|
||||
case OMPD_teams_distribute_parallel_for_simd:
|
||||
case OMPD_target_teams_distribute:
|
||||
case OMPD_target_teams_distribute_parallel_for:
|
||||
case OMPD_target_teams_distribute_parallel_for_simd:
|
||||
|
|
|
@ -2100,18 +2100,6 @@ void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
|
|||
});
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
|
||||
const OMPTeamsDistributeParallelForSimdDirective &S) {
|
||||
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(
|
||||
*this, OMPD_teams_distribute_parallel_for_simd,
|
||||
[&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
OMPLoopScope PreInitScope(CGF, S);
|
||||
CGF.EmitStmt(
|
||||
cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
|
||||
});
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
|
||||
const OMPTargetTeamsDistributeDirective &S) {
|
||||
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
|
||||
|
@ -3953,6 +3941,28 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
|
|||
[](CodeGenFunction &) { return nullptr; });
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
|
||||
const OMPTeamsDistributeParallelForSimdDirective &S) {
|
||||
auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
|
||||
S.getDistInc());
|
||||
};
|
||||
|
||||
// Emit teams region as a standalone region.
|
||||
auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
|
||||
PrePostActionTy &) {
|
||||
OMPPrivateScope PrivateScope(CGF);
|
||||
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
|
||||
(void)PrivateScope.Privatize();
|
||||
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
|
||||
CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
|
||||
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
|
||||
};
|
||||
emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
|
||||
emitPostUpdateForReductionClause(*this, S,
|
||||
[](CodeGenFunction &) { return nullptr; });
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPCancellationPointDirective(
|
||||
const OMPCancellationPointDirective &S) {
|
||||
CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
|
||||
|
|
|
@ -2201,7 +2201,6 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
|||
case OMPD_distribute_simd:
|
||||
case OMPD_distribute_parallel_for:
|
||||
case OMPD_teams_distribute_simd:
|
||||
case OMPD_teams_distribute_parallel_for_simd:
|
||||
case OMPD_target_teams_distribute:
|
||||
case OMPD_target_teams_distribute_parallel_for:
|
||||
case OMPD_target_teams_distribute_parallel_for_simd:
|
||||
|
@ -2220,7 +2219,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
|
|||
Params);
|
||||
break;
|
||||
}
|
||||
case OMPD_teams_distribute_parallel_for: {
|
||||
case OMPD_teams_distribute_parallel_for:
|
||||
case OMPD_teams_distribute_parallel_for_simd: {
|
||||
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
|
||||
QualType KmpInt32PtrTy =
|
||||
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
|
||||
|
@ -7155,12 +7155,24 @@ StmtResult Sema::ActOnOpenMPTeamsDistributeParallelForSimdDirective(
|
|||
// longjmp() and throw() must not violate the entry/exit criteria.
|
||||
CS->getCapturedDecl()->setNothrow();
|
||||
|
||||
for (int ThisCaptureLevel =
|
||||
getOpenMPCaptureLevels(OMPD_teams_distribute_parallel_for_simd);
|
||||
ThisCaptureLevel > 1; --ThisCaptureLevel) {
|
||||
CS = cast<CapturedStmt>(CS->getCapturedStmt());
|
||||
// 1.2.2 OpenMP Language Terminology
|
||||
// Structured block - An executable statement with a single entry at the
|
||||
// top and a single exit at the bottom.
|
||||
// The point of exit cannot be a branch out of the structured block.
|
||||
// longjmp() and throw() must not violate the entry/exit criteria.
|
||||
CS->getCapturedDecl()->setNothrow();
|
||||
}
|
||||
|
||||
OMPLoopDirective::HelperExprs B;
|
||||
// In presence of clause 'collapse' with number of loops, it will
|
||||
// define the nested loops number.
|
||||
auto NestedLoopCount = CheckOpenMPLoop(
|
||||
OMPD_teams_distribute_parallel_for_simd, getCollapseNumberExpr(Clauses),
|
||||
nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
|
||||
nullptr /*ordered not a clause on distribute*/, CS, *this, *DSAStack,
|
||||
VarsWithImplicitDSA, B);
|
||||
|
||||
if (NestedLoopCount == 0)
|
||||
|
|
|
@ -24,7 +24,6 @@ int teams_argument_global(int n){
|
|||
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
|
||||
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
|
||||
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
|
||||
|
||||
// CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
|
||||
|
||||
// CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
|
||||
|
|
|
@ -0,0 +1,263 @@
|
|||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
|
||||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
|
||||
#ifdef CK1
|
||||
|
||||
int a[100];
|
||||
|
||||
// CK1: define {{.*}}i32 @{{.+}}teams_argument_globali(
|
||||
int teams_argument_global(int n){
|
||||
int te = n / 128;
|
||||
int th = 128;
|
||||
// discard n_addr
|
||||
// CK1: alloca i32,
|
||||
// CK1: [[TE:%.+]] = alloca i32,
|
||||
// CK1: [[TH:%.+]] = alloca i32,
|
||||
// CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
|
||||
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
|
||||
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
|
||||
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
|
||||
|
||||
// CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
|
||||
|
||||
// CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd num_teams(te), thread_limit(th) simdlen(64)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = 0;
|
||||
}
|
||||
|
||||
int i;
|
||||
// CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
|
||||
// CK1: call void @[[OFFL2:.+]](
|
||||
#pragma omp target
|
||||
{{{
|
||||
#pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i)
|
||||
for(i = 0; i < n; i++) {
|
||||
a[i] = 0;
|
||||
}
|
||||
}}}
|
||||
// outlined target regions
|
||||
// CK1: define internal void @[[OFFL1]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], i{{32|64}} {{.+}}, {{.+}})
|
||||
// CK1: [[TE_ADDR:%.+]] = alloca i{{32|64}},
|
||||
// CK1: [[TH_ADDR:%.+]] = alloca i{{32|64}},
|
||||
// CK1: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
|
||||
// CK1: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
|
||||
// CK1-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
|
||||
// CK1-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
|
||||
// CK1-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
|
||||
// CK1-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
|
||||
// CK1-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
|
||||
// CK1-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
|
||||
// CK1: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
|
||||
// CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OUTL1]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4(
|
||||
// CK1: call void {{.+}} @__kmpc_fork_call(
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OFFL2]]({{.+}}, {{.+}})
|
||||
// CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}}, {{.+}}, {{.+}})
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OUTL2]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4(
|
||||
// CK1: call void {{.+}} @__kmpc_fork_call(
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
return a[0];
|
||||
}
|
||||
|
||||
// CK1-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 4}
|
||||
// CK1-DAG: !{!"llvm.loop.vectorize.width", i32 64}
|
||||
|
||||
#endif // CK1
|
||||
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
|
||||
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
|
||||
#ifdef CK2
|
||||
|
||||
// CK2: define {{.*}}i32 @{{.+}}teams_local_argv(
|
||||
int teams_local_arg(void) {
|
||||
int n = 100;
|
||||
int a[n], i;
|
||||
|
||||
// CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
|
||||
// CK2: call void @[[OFFL1:.+]](
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i)
|
||||
for(i = 0; i < n; i++) {
|
||||
a[i] = 0;
|
||||
}
|
||||
|
||||
// outlined target region
|
||||
// CK2: define internal void @[[OFFL1]]({{.+}}, {{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTL1]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void {{.+}} @__kmpc_fork_call(
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
return a[0];
|
||||
}
|
||||
|
||||
// CK2-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
// CK2-DAG: !{!"llvm.loop.vectorize.width", i32 4}
|
||||
|
||||
#endif // CK2
|
||||
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
|
||||
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
|
||||
// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
|
||||
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
|
||||
#ifdef CK3
|
||||
|
||||
// CK3: [[SSI:%.+]] = type { [{{.+}} x i32], float }
|
||||
|
||||
template <typename T, int X, long long Y>
|
||||
struct SS{
|
||||
T a[X];
|
||||
float b;
|
||||
// CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
|
||||
int foo(void) {
|
||||
int i;
|
||||
// CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
|
||||
// CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i)
|
||||
for(i = 0; i < X; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
|
||||
// outlined target region
|
||||
// CK3: define internal void @[[OFFL1]]([[SSI]]* {{.+}})
|
||||
// CK3: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 2, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
|
||||
// CK3: ret void
|
||||
|
||||
// CK3: define internal void @[[OUTL1]]({{.+}})
|
||||
// CK3: call void @__kmpc_for_static_init_4(
|
||||
// CK3: call void {{.+}} @__kmpc_fork_call(
|
||||
// CK3: call void @__kmpc_for_static_fini(
|
||||
// CK3: ret void
|
||||
|
||||
return a[0];
|
||||
}
|
||||
};
|
||||
|
||||
int teams_template_struct(void) {
|
||||
SS<int, 123, 456> V;
|
||||
return V.foo();
|
||||
|
||||
}
|
||||
|
||||
// CK3-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
// CK3-DAG: !{!"llvm.loop.vectorize.width", i32 4}
|
||||
#endif // CK3
|
||||
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
|
||||
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
|
||||
// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
|
||||
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-32
|
||||
|
||||
#ifdef CK4
|
||||
|
||||
template <typename T, int n>
|
||||
int tmain(T argc) {
|
||||
T a[n];
|
||||
int te = n/128;
|
||||
int th = 128;
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd num_teams(te) thread_limit(th) simdlen(64)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
int n = 100;
|
||||
int a[n], i;
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i)
|
||||
for(i = 0; i < n; i++) {
|
||||
a[i] = 0;
|
||||
}
|
||||
return tmain<int, 10>(argc);
|
||||
}
|
||||
|
||||
// CK4: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
|
||||
// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
|
||||
// CK4: call void @[[OFFL1:.+]]({{.+}})
|
||||
// CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
|
||||
// CK4: ret
|
||||
|
||||
// CK4: define {{.*}}void @[[OFFL1]]({{.+}})
|
||||
// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}}, {{.+}})
|
||||
// CK4: ret void
|
||||
|
||||
// CK4: define internal void @[[OUTL1]]({{.+}})
|
||||
// CK4: call void @__kmpc_for_static_init_4(
|
||||
// CK4: call void {{.+}} @__kmpc_fork_call(
|
||||
// CK4: call void @__kmpc_for_static_fini(
|
||||
// CK4: ret void
|
||||
|
||||
// CK4: define {{.*}}i32 @[[TMAIN]]({{.+}})
|
||||
// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
|
||||
// CK4: call void @[[OFFLT:.+]]({{.+}})
|
||||
// CK4: ret
|
||||
// CK4-NEXT: }
|
||||
|
||||
// CK4: define {{.*}}void @[[OFFLT]](i{{32|64}} [[TE_ARG:%.+]], i{{32|64}} [[TH_ARG:%.+]], {{.+}})
|
||||
// CK4: [[TE_ADDR:%.+]] = alloca i{{32|64}},
|
||||
// CK4: [[TH_ADDR:%.+]] = alloca i{{32|64}},
|
||||
// CK4: store{{.+}} [[TE_ARG]], {{.+}} [[TE_ADDR]],
|
||||
// CK4: store{{.+}} [[TH_ARG]], {{.+}} [[TH_ADDR]],
|
||||
// CK4-64: [[TE_CONV:%.+]] = bitcast{{.+}} [[TE_ADDR]] to
|
||||
// CK4-64: [[TH_CONV:%.+]] = bitcast{{.+}} [[TH_ADDR]] to
|
||||
// CK4-64: [[TE_VAL:%.+]] = load i32, i32* [[TE_CONV]],
|
||||
// CK4-64: [[TH_VAL:%.+]] = load i32, i32* [[TH_CONV]],
|
||||
// CK4-32: [[TE_VAL:%.+]] = load i32, i32* [[TE_ADDR]],
|
||||
// CK4-32: [[TH_VAL:%.+]] = load i32, i32* [[TH_ADDR]],
|
||||
// CK4: {{%.+}} = call i32 @__kmpc_push_num_teams({{.+}}, {{.+}}, i32 [[TE_VAL]], i32 [[TH_VAL]])
|
||||
// CK4: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT:.+]] to {{.+}}, {{.+}}, {{.+}})
|
||||
// CK4: ret void
|
||||
|
||||
// CK4: define internal void @[[OUTLT]]({{.+}})
|
||||
// CK4: call void @__kmpc_for_static_init_4(
|
||||
// CK4: call void {{.+}} @__kmpc_fork_call(
|
||||
// CK4: call void @__kmpc_for_static_fini(
|
||||
// CK4: ret void
|
||||
|
||||
// CK4-DAG: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
// CK4-DAG: !{!"llvm.loop.vectorize.width", i32 4}
|
||||
// CK4-DAG: !{!"llvm.loop.vectorize.width", i32 64}
|
||||
|
||||
#endif // CK4
|
||||
#endif
|
||||
|
|
@ -0,0 +1,149 @@
|
|||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
|
||||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
|
||||
#ifdef CK1
|
||||
|
||||
template <typename T, int X, long long Y>
|
||||
struct SS{
|
||||
T a[X][Y];
|
||||
|
||||
// CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
|
||||
int foo(void) {
|
||||
|
||||
// CK1: call i32 @__tgt_target_teams(
|
||||
// CK1: call void @[[OFFL1:.+]](
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd collapse(2)
|
||||
for(int i = 0; i < X; i++) {
|
||||
for(int j = 0; j < Y; j++) {
|
||||
a[i][j] = (T)0;
|
||||
}
|
||||
}
|
||||
// CK1: define internal void @[[OFFL1]](
|
||||
// CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OUTL1]]({{.+}})
|
||||
// discard loop variables not needed here
|
||||
// CK1: [[OMP_UB:%.omp.comb.ub]] = alloca i32,
|
||||
// CK1: store i32 56087, i32* [[OMP_UB]],
|
||||
// CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
|
||||
// CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[PAR_OUTL1]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
return a[0][0];
|
||||
}
|
||||
};
|
||||
|
||||
int teams_template_struct(void) {
|
||||
SS<int, 123, 456> V;
|
||||
return V.foo();
|
||||
|
||||
}
|
||||
|
||||
// CK4: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#endif // CK1
|
||||
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
|
||||
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
|
||||
#ifdef CK2
|
||||
|
||||
template <typename T, int n, int m>
|
||||
int tmain(T argc) {
|
||||
T a[n][m];
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd collapse(2)
|
||||
for(int i = 0; i < n; i++) {
|
||||
for(int j = 0; j < m; j++) {
|
||||
a[i][j] = (T)0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
int n = 100;
|
||||
int m = 2;
|
||||
int a[n][m];
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd collapse(2)
|
||||
for(int i = 0; i < n; i++) {
|
||||
for(int j = 0; j < m; j++) {
|
||||
a[i][j] = 0;
|
||||
}
|
||||
}
|
||||
return tmain<int, 10, 2>(argc);
|
||||
}
|
||||
|
||||
// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFL1:.+]]({{.+}})
|
||||
// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
|
||||
// CK2: ret
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFL1]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTL1]]({{.+}})
|
||||
// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i64,
|
||||
// CK2: store i64 {{.+}}, i64* [[OMP_UB]],
|
||||
// CK2: call void @__kmpc_for_static_init_8({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i64* [[OMP_UB]],
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_{{[4|8]}}({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
|
||||
// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFLT1:.+]]({{.+}})
|
||||
// CK2: ret
|
||||
// CK2-NEXT: }
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFLT1]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTLT1]]({{.+}})
|
||||
// discard loop variables not needed here
|
||||
// CK2: [[OMP_UB:%.omp.comb.ub]] = alloca i32,
|
||||
// CK2: store i32 {{.+}}, i32* [[OMP_UB]],
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, {{.+}}, {{.+}}, i32* [[OMP_UB]],
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[TPAR_OUTL1]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34, {{.+}}, {{.+}},
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK4: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#endif // CK2
|
||||
#endif // #ifndef HEADER
|
|
@ -0,0 +1,268 @@
|
|||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
|
||||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
|
||||
#ifdef CK1
|
||||
|
||||
template <typename T, int X, long long Y>
|
||||
struct SS{
|
||||
T a[X];
|
||||
float b;
|
||||
// CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
|
||||
int foo(void) {
|
||||
|
||||
// CK1: call i32 @__tgt_target_teams(
|
||||
// CK1: call void @[[OFFL1:.+]](
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd
|
||||
for(int i = 0; i < X; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
// CK1: call i32 @__tgt_target_teams(
|
||||
// CK1: call void @[[OFFL2:.+]](
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd dist_schedule(static)
|
||||
for(int i = 0; i < X; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
// CK1: call i32 @__tgt_target_teams(
|
||||
// CK1: call void @[[OFFL3:.+]](
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd dist_schedule(static, X/2)
|
||||
for(int i = 0; i < X; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
// CK1: define internal void @[[OFFL1]](
|
||||
// CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OUTL1]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
|
||||
// CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[PAR_OUTL1]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4(
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OFFL2]](
|
||||
// CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OUTL2]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
|
||||
// CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[PAR_OUTL2]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4(
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
|
||||
// CK1: define internal void @[[OFFL3]](
|
||||
// CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OUTL3]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
|
||||
// CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[PAR_OUTL3]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4(
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
return a[0];
|
||||
}
|
||||
};
|
||||
|
||||
int teams_template_struct(void) {
|
||||
SS<int, 123, 456> V;
|
||||
return V.foo();
|
||||
|
||||
}
|
||||
|
||||
// CK1: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#endif // CK1
|
||||
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
|
||||
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
|
||||
#ifdef CK2
|
||||
|
||||
template <typename T, int n>
|
||||
int tmain(T argc) {
|
||||
T a[n];
|
||||
int m = 10;
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd dist_schedule(static)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd dist_schedule(static, m)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
int n = 100;
|
||||
int a[n];
|
||||
int m = 10;
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = 0;
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd dist_schedule(static)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = 0;
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd dist_schedule(static, m)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = 0;
|
||||
}
|
||||
return tmain<int, 10>(argc);
|
||||
}
|
||||
|
||||
// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFL1:.+]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFL2:.+]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFL3:.+]]({{.+}})
|
||||
// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
|
||||
// CK2: ret
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFL1]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTL1]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTL2]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTL2]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFL3]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTL3]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTL3]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFLT1:.+]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFLT2:.+]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFLT3:.+]]({{.+}})
|
||||
// CK2: ret
|
||||
// CK2-NEXT: }
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFLT1]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTLT1]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTLT1]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFLT2]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTLT2]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTLT2]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFLT3]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTLT3]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTLT3]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#endif // CK2
|
||||
#endif // #ifndef HEADER
|
|
@ -0,0 +1,500 @@
|
|||
// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
|
||||
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
|
||||
// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
|
||||
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
|
||||
|
||||
// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
|
||||
// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
|
||||
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
struct St {
|
||||
int a, b;
|
||||
St() : a(0), b(0) {}
|
||||
St(const St &st) : a(st.a + st.b), b(0) {}
|
||||
~St() {}
|
||||
};
|
||||
|
||||
volatile int g = 1212;
|
||||
volatile int &g1 = g;
|
||||
|
||||
template <class T>
|
||||
struct S {
|
||||
T f;
|
||||
S(T a) : f(a + g) {}
|
||||
S() : f(g) {}
|
||||
S(const S &s, St t = St()) : f(s.f + t.a) {}
|
||||
operator T() { return T(); }
|
||||
~S() {}
|
||||
};
|
||||
|
||||
// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
|
||||
// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
|
||||
// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
|
||||
|
||||
template <typename T>
|
||||
T tmain() {
|
||||
S<T> test;
|
||||
T t_var = T();
|
||||
T vec[] = {1, 2};
|
||||
S<T> s_arr[] = {1, 2};
|
||||
S<T> &var = test;
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd firstprivate(t_var, vec, s_arr, var)
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
vec[i] = t_var;
|
||||
s_arr[i] = var;
|
||||
}
|
||||
return T();
|
||||
}
|
||||
|
||||
// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
|
||||
S<float> test;
|
||||
// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
|
||||
int t_var = 333;
|
||||
// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
|
||||
int vec[] = {1, 2};
|
||||
// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
|
||||
S<float> s_arr[] = {1, 2};
|
||||
// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
|
||||
S<float> var(3);
|
||||
// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
|
||||
|
||||
int main() {
|
||||
static int sivar;
|
||||
#ifdef LAMBDA
|
||||
// LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
|
||||
// LAMBDA-LABEL: @main
|
||||
// LAMBDA: call void [[OUTER_LAMBDA:@.+]](
|
||||
[&]() {
|
||||
// LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
|
||||
// LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
|
||||
// LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
|
||||
// LAMBDA: ret
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd firstprivate(g, g1, sivar)
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
// LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
|
||||
// LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
|
||||
// LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
|
||||
// LAMBDA: {{%.+}} = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[G_CAST:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[G1_CAST:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA-DAG: [[G_CAST_VAL:%.+]] = load{{.+}} [[G_CAST]],
|
||||
// LAMBDA-DAG: [[G1_CAST_VAL:%.+]] = load{{.+}} [[G1_CAST]],
|
||||
// LAMBDA-DAG: [[SIVAR_CAST_VAL:%.+]] = load{{.+}} [[SIVAR_CAST]],
|
||||
// LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[G_CAST_VAL]], {{.+}} [[G1_CAST_VAL]], {{.+}} [[SIVAR_CAST_VAL]])
|
||||
// LAMBDA: ret void
|
||||
|
||||
// LAMBDA: define internal void @[[LOUTL1]]({{.+}})
|
||||
// Skip global and bound tid vars
|
||||
// LAMBDA: {{.+}} = alloca i32*,
|
||||
// LAMBDA: {{.+}} = alloca i32*,
|
||||
// LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
|
||||
// skip loop vars
|
||||
// LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
|
||||
// LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
|
||||
// LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
|
||||
// LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
|
||||
// LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
|
||||
// LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
|
||||
// LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
|
||||
g = 1;
|
||||
g1 = 1;
|
||||
sivar = 2;
|
||||
// LAMBDA: call void @__kmpc_for_static_init_4(
|
||||
// LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
|
||||
// LAMBDA: call void @__kmpc_for_static_fini(
|
||||
// LAMBDA: ret void
|
||||
|
||||
// LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}})
|
||||
// Skip global and bound tid vars, and prev lb and ub vars
|
||||
// LAMBDA: {{.+}} = alloca i32*,
|
||||
// LAMBDA: {{.+}} = alloca i32*,
|
||||
// LAMBDA: {{.+}} = alloca i{{[0-9]+}},
|
||||
// LAMBDA: {{.+}} = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[G_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[G1_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[G1_TMP:%.+]] = alloca i32*,
|
||||
// skip loop vars
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[G1_TMP_PRIV:%.+]] = alloca i{{[0-9]+}}*,
|
||||
// LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA-DAG: store {{.+}}, {{.+}} [[G_ADDR]],
|
||||
// LAMBDA-DAG: store {{.+}}, {{.+}} [[G1_ADDR]],
|
||||
// LAMBDA-DAG: store {{.+}}, {{.+}} [[SIVAR_ADDR]],
|
||||
// LAMBDA-DAG: [[G_CONV:%.+]] = bitcast {{.+}} [[G_ADDR]] to
|
||||
// LAMBDA-DAG: [[G1_CONV:%.+]] = bitcast {{.+}} [[G1_ADDR]] to
|
||||
// LAMBDA-DAG: [[SIVAR_CONV:%.+]] = bitcast {{.+}} [[SIVAR_ADDR]] to
|
||||
// LAMBDA-DAG: store{{.+}} [[G1_CONV]], {{.+}} [[G1_TMP]],
|
||||
|
||||
// use of private vars
|
||||
// LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]],
|
||||
// LAMBDA-DAG: [[G1:%.+]] = load{{.+}}, {{.+}}* [[G1_TMP_PRIV]]
|
||||
// LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1]],
|
||||
// LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]],
|
||||
// LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[G1_TMP]],
|
||||
// LAMBDA: call void [[INNER_LAMBDA:@.+]](
|
||||
// LAMBDA: call void @__kmpc_for_static_fini(
|
||||
// LAMBDA: ret void
|
||||
[&]() {
|
||||
// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
|
||||
// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
|
||||
g = 2;
|
||||
g1 = 2;
|
||||
sivar = 4;
|
||||
// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
|
||||
|
||||
// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
|
||||
// LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
|
||||
// LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
|
||||
// LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
|
||||
// LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
|
||||
// LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
|
||||
// LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
|
||||
// LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
|
||||
}();
|
||||
}
|
||||
}();
|
||||
return 0;
|
||||
|
||||
// LAMBDA: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#else
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd firstprivate(t_var, vec, s_arr, var, sivar)
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
vec[i] = t_var;
|
||||
s_arr[i] = var;
|
||||
sivar += i;
|
||||
}
|
||||
return tmain<int>();
|
||||
#endif
|
||||
}
|
||||
|
||||
// CHECK: define {{.*}}i{{[0-9]+}} @main()
|
||||
// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
|
||||
// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
|
||||
// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
|
||||
// CHECK: ret
|
||||
|
||||
// CHECK: define{{.*}} void @[[OFFL1]]({{.+}})
|
||||
// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
|
||||
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
|
||||
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
|
||||
// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[T_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[SIVAR_CAST:%.+]] = alloca i{{[0-9]+}},
|
||||
|
||||
// CHECK-DAG: [[VEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PRIV]],
|
||||
// CHECK-DAG: [[T_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_CAST]],
|
||||
// CHECK-DAG: [[S_ARR_TE_PAR:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_PRIV]],
|
||||
// CHECK-DAG: [[VAR_TE_PAR:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PRIV]],
|
||||
// CHECK-DAG: [[SIVAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_CAST]],
|
||||
|
||||
// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}} @[[OUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[VEC_TE_PAR]], i{{[0-9]+}} [[T_VAR_TE_PAR]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_TE_PAR]], [[S_FLOAT_TY]]* [[VAR_TE_PAR]], i{{[0-9]+}} [[SIVAR_TE_PAR]])
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void @[[OUTL1]]({{.+}})
|
||||
// Skip global and bound tid vars
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
|
||||
// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
|
||||
// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
|
||||
// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// Skip temp vars for loop
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
|
||||
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
|
||||
// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
|
||||
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
|
||||
// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
|
||||
|
||||
// param copy
|
||||
// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
|
||||
// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
|
||||
// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
|
||||
// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
|
||||
// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
|
||||
|
||||
// T_VAR and SIVAR
|
||||
// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
|
||||
// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
|
||||
|
||||
// preparation vars
|
||||
// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
|
||||
// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
|
||||
// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
|
||||
|
||||
// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
|
||||
// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
|
||||
// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}})
|
||||
|
||||
// firstprivate(s_arr)
|
||||
// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
|
||||
// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
|
||||
// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
|
||||
// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
|
||||
// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
|
||||
// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
|
||||
|
||||
// firstprivate(var)
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void @[[PAR_OUTL]]({{.+}})
|
||||
// Skip global and bound tid vars, and prev lb ub vars
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
|
||||
// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
|
||||
// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
|
||||
// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// Skip temp vars for loop
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
|
||||
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
|
||||
// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
|
||||
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
|
||||
// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
|
||||
|
||||
// param copy
|
||||
// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
|
||||
// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
|
||||
// CHECK: store [2 x [[S_FLOAT_TY]]]* {{.+}}, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
|
||||
// CHECK: store [[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]** [[VAR_ADDR]],
|
||||
// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR_ADDR]],
|
||||
|
||||
// T_VAR and SIVAR
|
||||
// CHECK-DAG-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
|
||||
// CHECK-DAG-64: [[CONV_SIVAR:%.+]] = bitcast i64* [[SIVAR_ADDR]] to i32*
|
||||
|
||||
// preparation vars
|
||||
// CHECK-DAG: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
|
||||
// CHECK-DAG: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
|
||||
// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
|
||||
|
||||
// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
|
||||
// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
|
||||
// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}})
|
||||
|
||||
// firstprivate(s_arr)
|
||||
// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
|
||||
// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]] to
|
||||
// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
|
||||
// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}}], [ [[S_ARR_DST:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
|
||||
// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
|
||||
// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
|
||||
|
||||
// firstprivate(var)
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
|
||||
// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
|
||||
// CHECK-DAG-32: {{.+}} = {{.+}} [[SIVAR_ADDR]]
|
||||
// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_SIVAR]]
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
|
||||
// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
|
||||
// CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}})
|
||||
// CHECK: ret
|
||||
|
||||
// CHECK: define {{.*}}void @[[TOFFL1]]({{.+}})
|
||||
// CHECK: [[TT_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[TVEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}]*,
|
||||
// CHECK: [[TS_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]]*,
|
||||
// CHECK: [[TVAR_PRIV:%.+]] = alloca [[S_INT_TY]]*,
|
||||
// CHECK: [[TT_VAR_CAST:%.+]] = alloca i{{[0-9]+}},
|
||||
|
||||
// CHECK-DAG: [[TVEC_TE_PAR:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[TVEC_PRIV]],
|
||||
// CHECK-DAG: [[TT_VAR_TE_PAR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[TT_VAR_CAST]],
|
||||
// CHECK-DAG: [[TS_ARR_TE_PAR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[TS_ARR_PRIV]],
|
||||
// CHECK-DAG: [[TVAR_TE_PAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TVAR_PRIV]],
|
||||
|
||||
// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[TOUTL1:.+]] to {{.+}}, [2 x i{{[0-9]+}}]* [[TVEC_TE_PAR]], i{{[0-9]+}} [[TT_VAR_TE_PAR]], [2 x [[S_INT_TY]]]* [[TS_ARR_TE_PAR]], [[S_INT_TY]]* [[TVAR_TE_PAR]])
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void @[[TOUTL1]]({{.+}})
|
||||
// Skip global and bound tid vars
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
|
||||
// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
|
||||
// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
|
||||
// Skip temp vars for loop
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
|
||||
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
|
||||
// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
|
||||
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
|
||||
// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
|
||||
// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
|
||||
|
||||
// param copy
|
||||
// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
|
||||
// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
|
||||
// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
|
||||
// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
|
||||
|
||||
// T_VAR and preparation variables
|
||||
// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
|
||||
// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
|
||||
// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
|
||||
|
||||
// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
|
||||
// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
|
||||
// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}})
|
||||
|
||||
// firstprivate(s_arr)
|
||||
// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
|
||||
// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
|
||||
// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
|
||||
// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
|
||||
// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
|
||||
// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
|
||||
|
||||
// firstprivate(var)
|
||||
// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
|
||||
// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void @[[TPAR_OUTL]]({{.+}})
|
||||
// Skip global and bound tid vars
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
|
||||
// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
|
||||
// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
|
||||
// Skip temp vars for loop
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
|
||||
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
|
||||
// CHECK: [[AGG_TMP1:%.+]] = alloca [[ST_TY]],
|
||||
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
|
||||
// CHECK: [[AGG_TMP2:%.+]] = alloca [[ST_TY]],
|
||||
// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
|
||||
|
||||
// param copy
|
||||
// CHECK: store [2 x i{{[0-9]+}}]* {{.+}}, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
|
||||
// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[T_VAR_ADDR]],
|
||||
// CHECK: store [2 x [[S_INT_TY]]]* {{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
|
||||
// CHECK: store [[S_INT_TY]]* {{.+}}, [[S_INT_TY]]** [[VAR_ADDR]],
|
||||
|
||||
// T_VAR and preparation variables
|
||||
// CHECK: [[VEC_ADDR_VAL:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
|
||||
// CHECK-64: [[CONV_TVAR:%.+]] = bitcast i64* [[T_VAR_ADDR]] to i32*
|
||||
// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]],
|
||||
|
||||
// firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2
|
||||
// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
|
||||
// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST_PRIV]], i8* [[VEC_SRC]], {{.+}})
|
||||
|
||||
// firstprivate(s_arr)
|
||||
// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
|
||||
// CHECK-DAG: [[S_ARR_ADDR_BGN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]] to
|
||||
// CHECK-DAG: [[S_ARR_FIN:%.+]] = icmp{{.+}} [[S_ARR_PRIV_BGN]],
|
||||
// CHECK-DAG: [[S_ARR_SRC_COPY:%.+]] = phi{{.+}} [ [[S_ARR_ADDR_BGN]], {{.+}} ], [ [[S_ARR_SRC:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: [[S_ARR_DST_COPY:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_DST:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_DST_COPY]], {{.+}} [[S_ARR_SRC_COPY]], {{.+}} [[AGG_TMP1]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP1]])
|
||||
// CHECK-DAG: [[S_ARR_DST]] = getelementptr {{.+}} [[S_ARR_DST_COPY]],
|
||||
// CHECK-DAG: [[S_ARR_SRC]] = getelementptr {{.+}} [[S_ARR_SRC_COPY]],
|
||||
|
||||
// firstprivate(var)
|
||||
// CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]],
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]], {{.+}} [[VAR_ADDR_REF]], {{.+}} [[AGG_TMP2]])
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[AGG_TMP2]])
|
||||
// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[TMP]],
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK-DAG-32: {{.+}} = {{.+}} [[T_VAR_ADDR]]
|
||||
// CHECK-DAG-64: {{.+}} = {{.+}} [[CONV_TVAR]]
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,187 @@
|
|||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
void fn1();
|
||||
void fn2();
|
||||
void fn3();
|
||||
void fn4();
|
||||
void fn5();
|
||||
void fn6();
|
||||
|
||||
int Arg;
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
|
||||
void gtid_test() {
|
||||
#pragma omp target
|
||||
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
|
||||
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
|
||||
#pragma omp teams distribute parallel for simd
|
||||
for(int i = 0 ; i < 100; i++) {}
|
||||
// CHECK: define internal void [[OFFLOADING_FUN_0]](
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
|
||||
// CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_0:@.+]] to void
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
||||
// CHECK: define{{.+}} void [[OMP_OUTLINED_0]](
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd if (parallel: false)
|
||||
for(int i = 0 ; i < 100; i++) {
|
||||
// CHECK: define internal void [[OFFLOADING_FUN_1]](
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
|
||||
// CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void @__kmpc_serialized_parallel(
|
||||
// CHECK: call void [[OMP_OUTLINED_1:@.+]](
|
||||
// CHECK: call void @__kmpc_end_serialized_parallel(
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: define{{.+}} void [[OMP_OUTLINED_1]](
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void @{{.+}}gtid_test
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret
|
||||
gtid_test();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
int tmain(T Arg) {
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd if (true)
|
||||
for(int i = 0 ; i < 100; i++) {
|
||||
fn1();
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd if (false)
|
||||
for(int i = 0 ; i < 100; i++) {
|
||||
fn2();
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd if (parallel: Arg)
|
||||
for(int i = 0 ; i < 100; i++) {
|
||||
fn3();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
|
||||
int main() {
|
||||
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
|
||||
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
|
||||
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
|
||||
// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd if (true)
|
||||
for(int i = 0 ; i < 100; i++) {
|
||||
// CHECK: define internal void [[OFFLOADING_FUN_0]](
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
|
||||
// CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_2:@.+]] to void
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: define{{.+}} void [[OMP_OUTLINED_2]](
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call {{.*}}void @{{.+}}fn4
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
||||
fn4();
|
||||
}
|
||||
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd if (false)
|
||||
for(int i = 0 ; i < 100; i++) {
|
||||
// CHECK: define internal void [[OFFLOADING_FUN_1]](
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
|
||||
// CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void @__kmpc_serialized_parallel(
|
||||
// CHECK: call void [[OMP_OUTLINED_3:@.+]](
|
||||
// CHECK: call void @__kmpc_end_serialized_parallel(
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
||||
// CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call {{.*}}void @{{.+}}fn5
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
fn5();
|
||||
}
|
||||
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd if (Arg)
|
||||
for(int i = 0 ; i < 100; i++) {
|
||||
// CHECK: define internal void [[OFFLOADING_FUN_2]](
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}})
|
||||
// CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_2]](
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, {{.+}}* [[OMP_OUTLINED_4:@.+]] to void
|
||||
// CHECK: call void @__kmpc_serialized_parallel(
|
||||
// CHECK: call void [[OMP_OUTLINED_4:@.+]](
|
||||
// CHECK: call void @__kmpc_end_serialized_parallel(
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
||||
// CHECK: define{{.+}} void [[OMP_OUTLINED_4]](
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call {{.*}}void @{{.+}}fn6
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
fn6();
|
||||
}
|
||||
|
||||
return tmain(Arg);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define {{.+}} @{{.+}}tmain
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_1:@.+]] to void
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
||||
// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_1]]
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call {{.*}}void @{{.+}}fn1
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
|
||||
// CHECK: call void [[T_OUTLINE_FUN_2:@.+]](
|
||||
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
||||
// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_2]]
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call {{.*}}void @{{.+}}fn2
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 2, void {{.+}}* [[T_OUTLINE_FUN_3:@.+]] to void
|
||||
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(
|
||||
// call void [[T_OUTLINE_FUN_3:@.+]](
|
||||
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(
|
||||
|
||||
// CHECK: define internal {{.*}}void [[T_OUTLINE_FUN_3]]
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call {{.*}}void @{{.+}}fn3
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,593 @@
|
|||
// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
|
||||
// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
|
||||
// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
|
||||
// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
|
||||
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
|
||||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
template <class T>
|
||||
struct S {
|
||||
T f;
|
||||
S(T a) : f(a) {}
|
||||
S() : f() {}
|
||||
operator T() { return T(); }
|
||||
~S() {}
|
||||
};
|
||||
|
||||
// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
|
||||
// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
|
||||
template <typename T>
|
||||
T tmain() {
|
||||
S<T> test;
|
||||
T t_var = T();
|
||||
T vec[] = {1, 2};
|
||||
S<T> s_arr[] = {1, 2};
|
||||
S<T> &var = test;
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var)
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
vec[i] = t_var;
|
||||
s_arr[i] = var;
|
||||
}
|
||||
return T();
|
||||
}
|
||||
|
||||
int main() {
|
||||
static int svar;
|
||||
volatile double g;
|
||||
volatile double &g1 = g;
|
||||
|
||||
#ifdef LAMBDA
|
||||
// LAMBDA-LABEL: @main
|
||||
// LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
|
||||
[&]() {
|
||||
static float sfvar;
|
||||
// LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
|
||||
// LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// LAMBDA: call void [[OFFLOADING_FUN:@.+]](
|
||||
|
||||
// LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
|
||||
// LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd lastprivate(g, g1, svar, sfvar)
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
// LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
|
||||
// LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
|
||||
// LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
|
||||
// LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
|
||||
// LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
|
||||
// loop variables
|
||||
// LAMBDA: {{.+}} = alloca i{{[0-9]+}},
|
||||
// LAMBDA: {{.+}} = alloca i{{[0-9]+}},
|
||||
// LAMBDA: {{.+}} = alloca i{{[0-9]+}},
|
||||
// LAMBDA: {{.+}} = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
|
||||
// LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
|
||||
// LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
|
||||
// LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
|
||||
// LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
|
||||
// LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
|
||||
// LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
|
||||
// LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]],
|
||||
|
||||
// init private variables
|
||||
// LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
|
||||
// LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
|
||||
// LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
|
||||
// LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
|
||||
// LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
|
||||
g = 1;
|
||||
g1 = 1;
|
||||
svar = 3;
|
||||
sfvar = 4.0;
|
||||
// LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
|
||||
// LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
|
||||
// LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
|
||||
// skip final branch
|
||||
// LAMBDA: br
|
||||
// LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
|
||||
// LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
|
||||
// LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
|
||||
|
||||
// LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
|
||||
// LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
|
||||
// LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
|
||||
// LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
|
||||
// LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
|
||||
// LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
|
||||
|
||||
// LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
|
||||
// LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
|
||||
// LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
|
||||
// LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
|
||||
// LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
|
||||
// LAMBDA: [[OMP_LASTPRIV_DONE]]:
|
||||
// LAMBDA: ret
|
||||
|
||||
// LAMBDA: define{{.*}} internal{{.*}} void @[[LPAR_OUTL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, {{.+}}, {{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
|
||||
// LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
|
||||
// LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
|
||||
// LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
|
||||
// LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
|
||||
// loop variables
|
||||
// LAMBDA: {{.+}} = alloca i{{[0-9]+}},
|
||||
// LAMBDA: {{.+}} = alloca i{{[0-9]+}},
|
||||
// LAMBDA: {{.+}} = alloca i{{[0-9]+}},
|
||||
// LAMBDA: {{.+}} = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
|
||||
// LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
|
||||
// LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
|
||||
// LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
|
||||
// LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
|
||||
// LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
|
||||
// LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
|
||||
// LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]],
|
||||
|
||||
// init private variables
|
||||
// LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
|
||||
// LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
|
||||
// LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
|
||||
// LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
|
||||
// LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
|
||||
|
||||
// LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
|
||||
// LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]],
|
||||
// LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
|
||||
// LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]],
|
||||
// LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]],
|
||||
// LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]],
|
||||
// LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]],
|
||||
// LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
|
||||
// LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
|
||||
// LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
|
||||
// LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
|
||||
// LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
|
||||
// LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
|
||||
// LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]]
|
||||
// LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
|
||||
// LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
|
||||
|
||||
// skip 'final' simd branch and block (checked as part of simd)
|
||||
// LAMBDA: br
|
||||
|
||||
// LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
|
||||
// LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
|
||||
// LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
|
||||
|
||||
// LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
|
||||
// LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
|
||||
// LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
|
||||
// LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
|
||||
// LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
|
||||
// LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[G1_IN_REF]],
|
||||
|
||||
// LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
|
||||
// LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
|
||||
// LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
|
||||
// LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
|
||||
// LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
|
||||
// LAMBDA: [[OMP_LASTPRIV_DONE]]:
|
||||
// LAMBDA: ret
|
||||
|
||||
[&]() {
|
||||
// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
|
||||
// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
|
||||
g = 2;
|
||||
g1 = 2;
|
||||
svar = 4;
|
||||
sfvar = 8.0;
|
||||
// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
|
||||
// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
|
||||
// LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
|
||||
|
||||
// LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
|
||||
// LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
|
||||
// LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
|
||||
// LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
|
||||
// LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
|
||||
// LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
|
||||
// LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
|
||||
// LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
|
||||
// LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
|
||||
}();
|
||||
}
|
||||
}();
|
||||
return 0;
|
||||
|
||||
// LAMBDA: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#else
|
||||
S<float> test;
|
||||
int t_var = 0;
|
||||
int vec[] = {1, 2};
|
||||
S<float> s_arr[] = {1, 2};
|
||||
S<float> &var = test;
|
||||
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd lastprivate(t_var, vec, s_arr, s_arr, var, var, svar)
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
vec[i] = t_var;
|
||||
s_arr[i] = var;
|
||||
}
|
||||
int i;
|
||||
|
||||
return tmain<int>();
|
||||
#endif
|
||||
}
|
||||
|
||||
// CHECK: define{{.*}} i{{[0-9]+}} @main()
|
||||
// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
|
||||
// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
|
||||
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
|
||||
// CHECK: ret
|
||||
|
||||
// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}})
|
||||
// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(
|
||||
// CHECK: ret
|
||||
//
|
||||
// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]])
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
|
||||
// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
|
||||
// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
|
||||
// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
|
||||
// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
|
||||
// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
|
||||
// skip loop variables
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
|
||||
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
|
||||
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
|
||||
// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
|
||||
// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
|
||||
// copy from parameters to local address variables
|
||||
// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
|
||||
// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
|
||||
// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
|
||||
// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
|
||||
// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
|
||||
|
||||
// load content of local address variables
|
||||
// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
|
||||
// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
|
||||
// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
|
||||
// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
|
||||
// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
|
||||
// CHECK: [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
|
||||
// the distribute loop
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
||||
// lastprivates
|
||||
// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
|
||||
// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
|
||||
// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
|
||||
|
||||
// CHECK: [[OMP_LASTPRIV_BLOCK]]:
|
||||
// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
|
||||
// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
|
||||
// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
|
||||
// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
|
||||
// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
|
||||
// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
|
||||
// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
|
||||
// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
|
||||
// CHECK: [[S_ARR_COPY_BLOCK]]:
|
||||
// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
|
||||
// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
|
||||
// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
|
||||
// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
|
||||
// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
|
||||
// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
|
||||
// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
|
||||
// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
|
||||
// CHECK: [[S_ARR_COPY_DONE]]:
|
||||
// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
|
||||
// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
|
||||
// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
|
||||
// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
|
||||
// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]])
|
||||
// gbl and bound tid vars, prev lb and ub vars
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
|
||||
// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
|
||||
// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
|
||||
// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
|
||||
// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
|
||||
// skip loop variables
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
|
||||
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
|
||||
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
|
||||
// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
|
||||
// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
|
||||
// copy from parameters to local address variables
|
||||
// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
|
||||
// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
|
||||
// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
|
||||
// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
|
||||
// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
|
||||
|
||||
// load content of local address variables
|
||||
// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
|
||||
// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
|
||||
// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
|
||||
// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
|
||||
// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
|
||||
// CHECK: [[VAR_ADDR_REF:%.+]] = load {{.+}}, {{.+}} [[VAR_ADDR]],
|
||||
// the distribute loop
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
|
||||
// assignment: vec[i] = t_var;
|
||||
// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
|
||||
// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
|
||||
// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]],
|
||||
|
||||
// assignment: s_arr[i] = var;
|
||||
// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
|
||||
// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
|
||||
// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8*
|
||||
// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]],
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
||||
// skip final branch and block
|
||||
// CHECK: br
|
||||
|
||||
// lastprivates
|
||||
// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
|
||||
// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
|
||||
// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
|
||||
|
||||
// CHECK: [[OMP_LASTPRIV_BLOCK]]:
|
||||
// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
|
||||
// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
|
||||
// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
|
||||
// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
|
||||
// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
|
||||
// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
|
||||
// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
|
||||
// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
|
||||
// CHECK: [[S_ARR_COPY_BLOCK]]:
|
||||
// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
|
||||
// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
|
||||
// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
|
||||
// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
|
||||
// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
|
||||
// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
|
||||
// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
|
||||
// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
|
||||
// CHECK: [[S_ARR_COPY_DONE]]:
|
||||
// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
|
||||
// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_ADDR_REF]] to i8*
|
||||
// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
|
||||
// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
|
||||
// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
|
||||
// CHECK: ret void
|
||||
|
||||
// template tmain
|
||||
// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
|
||||
// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
|
||||
// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
|
||||
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
|
||||
// CHECK: ret
|
||||
|
||||
// CHECK: define internal void [[OFFLOAD_FUN_1]](
|
||||
// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
|
||||
// CHECK: ret
|
||||
|
||||
// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
|
||||
// skip alloca of global_tid and bound_tid
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
|
||||
// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
|
||||
// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*,
|
||||
// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
|
||||
// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
|
||||
// skip loop variables
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}],
|
||||
// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]],
|
||||
// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]],
|
||||
// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*,
|
||||
|
||||
// skip init of bound and global tid
|
||||
// CHECK: store i{{[0-9]+}}* {{.*}},
|
||||
// CHECK: store i{{[0-9]+}}* {{.*}},
|
||||
// copy from parameters to local address variables
|
||||
// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
|
||||
// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]],
|
||||
// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
|
||||
// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]],
|
||||
|
||||
// load content of local address variables
|
||||
// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
|
||||
// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
|
||||
// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
|
||||
// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
|
||||
// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
|
||||
// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
||||
// lastprivates
|
||||
// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
|
||||
// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
|
||||
// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
|
||||
|
||||
// CHECK: [[OMP_LASTPRIV_BLOCK]]:
|
||||
// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
|
||||
// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]],
|
||||
// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8*
|
||||
// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
|
||||
// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]*
|
||||
// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
|
||||
// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
|
||||
// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
|
||||
// CHECK: [[S_ARR_COPY_BLOCK]]:
|
||||
// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
|
||||
// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
|
||||
// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
|
||||
// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
|
||||
// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
|
||||
// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
|
||||
// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
|
||||
// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
|
||||
// CHECK: [[S_ARR_COPY_DONE]]:
|
||||
// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
|
||||
// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8*
|
||||
// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void [[TPAR_OUTL:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, {{.+}}, {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
|
||||
// skip alloca of global_tid and bound_tid, and prev lb and ub vars
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
|
||||
// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*,
|
||||
// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
|
||||
// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
|
||||
// skip loop variables
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}],
|
||||
// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]],
|
||||
// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]],
|
||||
// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*,
|
||||
|
||||
// skip init of bound and global tid
|
||||
// CHECK: store i{{[0-9]+}}* {{.*}},
|
||||
// CHECK: store i{{[0-9]+}}* {{.*}},
|
||||
// copy from parameters to local address variables
|
||||
// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
|
||||
// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]],
|
||||
// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
|
||||
// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]],
|
||||
|
||||
// load content of local address variables
|
||||
// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
|
||||
// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
|
||||
// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
|
||||
// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
|
||||
// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
|
||||
// CHECK-DAG: store [[S_INT_TY]]* [[VAR_PRIV1]], [[S_INT_TY]]** [[TMP_PRIV1]],
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
|
||||
// assignment: vec[i] = t_var;
|
||||
// CHECK: [[IV_VAL1:%.+]] =
|
||||
// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
|
||||
// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
|
||||
// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]],
|
||||
|
||||
// assignment: s_arr[i] = var;
|
||||
// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]],
|
||||
// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
|
||||
// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8*
|
||||
// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
|
||||
// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]],
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
|
||||
// skip final branch and block
|
||||
// CHECK: br
|
||||
|
||||
// lastprivates
|
||||
// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
|
||||
// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
|
||||
// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
|
||||
|
||||
// CHECK: [[OMP_LASTPRIV_BLOCK]]:
|
||||
// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
|
||||
// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]],
|
||||
// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8*
|
||||
// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
|
||||
// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]*
|
||||
// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
|
||||
// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
|
||||
// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
|
||||
// CHECK: [[S_ARR_COPY_BLOCK]]:
|
||||
// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
|
||||
// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
|
||||
// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
|
||||
// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
|
||||
// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
|
||||
// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
|
||||
// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
|
||||
// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
|
||||
// CHECK: [[S_ARR_COPY_DONE]]:
|
||||
// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
|
||||
// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_ADDR1_REF]] to i8*
|
||||
// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,123 @@
|
|||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
typedef __INTPTR_TYPE__ intptr_t;
|
||||
|
||||
// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
|
||||
// CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] }
|
||||
// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
|
||||
// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
|
||||
|
||||
void foo();
|
||||
|
||||
struct S {
|
||||
intptr_t a, b, c;
|
||||
S(intptr_t a) : a(a) {}
|
||||
operator char() { return a; }
|
||||
~S() {}
|
||||
};
|
||||
|
||||
template <typename T, int C>
|
||||
int tmain() {
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd num_threads(C)
|
||||
for (int i = 0; i < 100; i++)
|
||||
foo();
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd num_threads(T(23))
|
||||
for (int i = 0; i < 100; i++)
|
||||
foo();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main() {
|
||||
S s(0);
|
||||
char a = s;
|
||||
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
|
||||
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
|
||||
// CHECK: invoke{{.+}} [[TMAIN_5:@.+]]()
|
||||
// CHECK: invoke{{.+}} [[TMAIN_1:@.+]]()
|
||||
#pragma omp target
|
||||
// CHECK: define internal void [[OFFLOADING_FUN_0]](
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
|
||||
#pragma omp teams distribute parallel for simd num_threads(2)
|
||||
for (int i = 0; i < 100; i++) {
|
||||
// CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_0]](
|
||||
// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 2)
|
||||
// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
|
||||
foo();
|
||||
}
|
||||
#pragma omp target
|
||||
// CHECK: define internal void [[OFFLOADING_FUN_1]](
|
||||
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}}* [[OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
|
||||
#pragma omp teams distribute parallel for simd num_threads(a)
|
||||
for (int i = 0; i < 100; i++) {
|
||||
// CHECK: define{{.+}} void [[OMP_TEAMS_OUTLINED_1]](
|
||||
// CHECK-DAG: [[A_ADDR:%.+]] = alloca i8*,
|
||||
// CHECK-DAG: [[A_REF:%.+]] = load i8*, i8** [[A_ADDR]],
|
||||
// CHECK-DAG: [[A_VAL:%.+]] = load i8, i8* [[A_REF]],
|
||||
// CHECK-DAG: [[A_EXT:%.+]] = sext i8 [[A_VAL]] to {{.+}}
|
||||
// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[A_EXT]])
|
||||
// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
|
||||
foo();
|
||||
}
|
||||
return a + tmain<char, 5>() + tmain<S, 1>();
|
||||
}
|
||||
|
||||
// tmain 5
|
||||
// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]()
|
||||
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// CHECK: call void [[T_OFFLOADING_FUN_0:@.+]](
|
||||
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// CHECK: call void [[T_OFFLOADING_FUN_1:@.+]](
|
||||
|
||||
// tmain 1
|
||||
// CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]()
|
||||
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// CHECK: call void [[T_OFFLOADING_FUN_2:@.+]](
|
||||
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
|
||||
// CHECK: call void [[T_OFFLOADING_FUN_3:@.+]](
|
||||
|
||||
// CHECK: define internal void [[T_OFFLOADING_FUN_0]](
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}})
|
||||
|
||||
// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_0]](
|
||||
// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 5)
|
||||
// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
|
||||
|
||||
// CHECK: define internal void [[T_OFFLOADING_FUN_1]](
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_1:@.+]] to {{.+}})
|
||||
|
||||
// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_1]](
|
||||
// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 23)
|
||||
// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
|
||||
|
||||
// CHECK: define internal void [[T_OFFLOADING_FUN_2]](
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}})
|
||||
|
||||
// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]](
|
||||
// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1)
|
||||
// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
|
||||
|
||||
// CHECK: define internal void [[T_OFFLOADING_FUN_3]](
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}})
|
||||
|
||||
// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]])
|
||||
// CHECK: [[NUM_TH_CPT:%.+]] = alloca i8*,
|
||||
// CHECK: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]],
|
||||
// CHECK: [[NUM_TH_REF:%.+]] = load{{.+}}, {{.+}} [[NUM_TH_CPT]],
|
||||
// CHECK-DAG: [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]],
|
||||
// CHECK-DAG: [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}}
|
||||
// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]])
|
||||
// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
|
||||
|
||||
// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,333 @@
|
|||
// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
|
||||
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
|
||||
// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
|
||||
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
|
||||
|
||||
// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
|
||||
// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
|
||||
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
struct St {
|
||||
int a, b;
|
||||
St() : a(0), b(0) {}
|
||||
St(const St &st) : a(st.a + st.b), b(0) {}
|
||||
~St() {}
|
||||
};
|
||||
|
||||
volatile int g = 1212;
|
||||
volatile int &g1 = g;
|
||||
|
||||
template <class T>
|
||||
struct S {
|
||||
T f;
|
||||
S(T a) : f(a + g) {}
|
||||
S() : f(g) {}
|
||||
S(const S &s, St t = St()) : f(s.f + t.a) {}
|
||||
operator T() { return T(); }
|
||||
~S() {}
|
||||
};
|
||||
|
||||
// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
|
||||
// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
|
||||
|
||||
template <typename T>
|
||||
T tmain() {
|
||||
S<T> test;
|
||||
T t_var = T();
|
||||
T vec[] = {1, 2};
|
||||
S<T> s_arr[] = {1, 2};
|
||||
S<T> &var = test;
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd private(t_var, vec, s_arr, var)
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
vec[i] = t_var;
|
||||
s_arr[i] = var;
|
||||
}
|
||||
return T();
|
||||
}
|
||||
|
||||
// CHECK-DAG: [[TEST:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
|
||||
S<float> test;
|
||||
// CHECK-DAG: [[T_VAR:@.+]] = global i{{[0-9]+}} 333,
|
||||
int t_var = 333;
|
||||
// CHECK-DAG: [[VEC:@.+]] = global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
|
||||
int vec[] = {1, 2};
|
||||
// CHECK-DAG: [[S_ARR:@.+]] = global [2 x [[S_FLOAT_TY]]] zeroinitializer,
|
||||
S<float> s_arr[] = {1, 2};
|
||||
// CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
|
||||
S<float> var(3);
|
||||
// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
|
||||
|
||||
int main() {
|
||||
static int sivar;
|
||||
#ifdef LAMBDA
|
||||
// LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
|
||||
// LAMBDA-LABEL: @main
|
||||
// LAMBDA: call void [[OUTER_LAMBDA:@.+]](
|
||||
[&]() {
|
||||
// LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
|
||||
// LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
|
||||
// LAMBDA: call void @[[LOFFL1:.+]](
|
||||
// LAMBDA: ret
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd private(g, g1, sivar)
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
// LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} {{%.+}}, i{{64|32}} {{%.+}})
|
||||
// LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[LOUTL1:.+]] to {{.+}})
|
||||
// LAMBDA: ret void
|
||||
|
||||
// LAMBDA: define internal void @[[LOUTL1]]({{.+}})
|
||||
// Skip global, bound tid and loop vars
|
||||
// LAMBDA: {{.+}} = alloca i32*,
|
||||
// LAMBDA: {{.+}} = alloca i32*,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
|
||||
// LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
|
||||
// LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
|
||||
|
||||
g = 1;
|
||||
g1 = 1;
|
||||
sivar = 2;
|
||||
// LAMBDA: call void @__kmpc_for_static_init_4(
|
||||
// LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
|
||||
// LAMBDA: call void @__kmpc_for_static_fini(
|
||||
// LAMBDA: ret void
|
||||
|
||||
// LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}})
|
||||
// Skip global, bound tid and loop vars
|
||||
// LAMBDA: {{.+}} = alloca i32*,
|
||||
// LAMBDA: {{.+}} = alloca i32*,
|
||||
// LAMBDA: alloca i{{[0-9]+}},
|
||||
// LAMBDA: alloca i{{[0-9]+}},
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: [[G_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[G1_PRIV:%.+]] = alloca i{{[0-9]+}}
|
||||
// LAMBDA: [[TMP:%.+]] = alloca i{{[0-9]+}}*,
|
||||
// LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// LAMBDA: store{{.+}} [[G1_PRIV]], {{.+}} [[TMP]],
|
||||
// LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G_PRIV]],
|
||||
// LAMBDA-DAG: store{{.+}} 2, {{.+}} [[SIVAR_PRIV]],
|
||||
// LAMBDA-DAG: [[G1_REF:%.+]] = load{{.+}}, {{.+}} [[TMP]],
|
||||
// LAMBDA-DAG: store{{.+}} 1, {{.+}} [[G1_REF]],
|
||||
// LAMBDA: call void [[INNER_LAMBDA:@.+]](
|
||||
// LAMBDA: call void @__kmpc_for_static_fini(
|
||||
// LAMBDA: ret void
|
||||
[&]() {
|
||||
// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
|
||||
// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
|
||||
g = 2;
|
||||
g1 = 2;
|
||||
sivar = 4;
|
||||
// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
|
||||
|
||||
// LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
|
||||
// LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
|
||||
// LAMBDA: [[G1_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
|
||||
// LAMBDA: [[G1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G1_PTR_REF]]
|
||||
// LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G1_REF]]
|
||||
// LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
|
||||
// LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
|
||||
// LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
|
||||
}();
|
||||
}
|
||||
}();
|
||||
return 0;
|
||||
|
||||
// LAMBDA: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#else
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd private(t_var, vec, s_arr, var, sivar)
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
vec[i] = t_var;
|
||||
s_arr[i] = var;
|
||||
sivar += i;
|
||||
}
|
||||
return tmain<int>();
|
||||
#endif
|
||||
}
|
||||
|
||||
// CHECK: define {{.*}}i{{[0-9]+}} @main()
|
||||
// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
|
||||
// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
|
||||
// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
|
||||
// CHECK: ret
|
||||
|
||||
// CHECK: define{{.*}} void @[[OFFL1]]({{.+}})
|
||||
// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[OUTL1:.+]] to {{.+}})
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void @[[OUTL1]]({{.+}})
|
||||
// Skip global, bound tid and loop vars
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i32,
|
||||
// CHECK: {{.+}} = alloca i32,
|
||||
// CHECK: {{.+}} = alloca i32,
|
||||
// CHECK: {{.+}} = alloca i32,
|
||||
// CHECK: {{.+}} = alloca i32,
|
||||
// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
|
||||
// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
|
||||
// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
|
||||
// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i32,
|
||||
|
||||
// private(s_arr)
|
||||
// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
|
||||
// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
|
||||
// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
|
||||
|
||||
// private(var)
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void @[[PAR_OUTL1]]({{.+}})
|
||||
// Skip global, bound tid and loop vars
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i{{[0-9]+}},
|
||||
// CHECK: {{.+}} = alloca i32,
|
||||
// CHECK: {{.+}} = alloca i32,
|
||||
// CHECK: {{.+}} = alloca i32,
|
||||
// CHECK: {{.+}} = alloca i32,
|
||||
// CHECK: {{.+}} = alloca i32,
|
||||
// CHECK-DAG: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK-DAG: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
|
||||
// CHECK-DAG: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
|
||||
// CHECK-DAG: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
|
||||
// CHECK-DAG: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i32,
|
||||
|
||||
// private(s_arr)
|
||||
// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
|
||||
// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
|
||||
// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
|
||||
|
||||
// private(var)
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[VAR_PRIV]]
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[SIVAR_PRIV]]
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
|
||||
// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
|
||||
// CHECK: call void @[[TOFFL1:.+]]()
|
||||
// CHECK: ret
|
||||
|
||||
// CHECK: define {{.*}}void @[[TOFFL1]]()
|
||||
// CHECK: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}} @[[TOUTL1:.+]] to {{.+}})
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void @[[TOUTL1]]({{.+}})
|
||||
// Skip global, bound tid and loop vars
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
|
||||
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
|
||||
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
|
||||
// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
|
||||
// CHECK: alloca i32,
|
||||
|
||||
// private(s_arr)
|
||||
// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
|
||||
// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
|
||||
// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
|
||||
|
||||
// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
|
||||
// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
|
||||
// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
|
||||
|
||||
// private(var)
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
|
||||
// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL1:.+]] to
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void @[[TPAR_OUTL1]]({{.+}})
|
||||
// Skip global, bound tid and loop vars
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// prev lb and ub
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// iter variables
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
|
||||
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
|
||||
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
|
||||
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
|
||||
// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
|
||||
// CHECK: alloca i32,
|
||||
|
||||
// private(s_arr)
|
||||
// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
|
||||
// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
|
||||
// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
|
||||
|
||||
// CHECK-DAG: [[S_ARR_PRIV_BGN:%.+]] = getelementptr{{.*}} [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]],
|
||||
// CHECK-DAG: [[S_ARR_PTR_ALLOC:%.+]] = phi{{.+}} [ [[S_ARR_PRIV_BGN]], {{.+}} ], [ [[S_ARR_NEXT:%.+]], {{.+}} ]
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[S_ARR_PTR_ALLOC]])
|
||||
// CHECK-DAG: [[S_ARR_NEXT]] = getelementptr {{.+}} [[S_ARR_PTR_ALLOC]],
|
||||
|
||||
// private(var)
|
||||
// CHECK-DAG: call void @{{.+}}({{.+}} [[VAR_PRIV]])
|
||||
// CHECK-DAG: store{{.+}} [[VAR_PRIV]], {{.+}} [[TMP]]
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[T_VAR_PRIV]]
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[VEC_PRIV]]
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[S_ARR_PRIV]]
|
||||
// CHECK-DAG: {{.+}} = {{.+}} [[TMP]]
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,93 @@
|
|||
// add -fopenmp-targets
|
||||
|
||||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
typedef __INTPTR_TYPE__ intptr_t;
|
||||
|
||||
// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
|
||||
// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
|
||||
// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
|
||||
|
||||
void foo();
|
||||
|
||||
struct S {
|
||||
intptr_t a, b, c;
|
||||
S(intptr_t a) : a(a) {}
|
||||
operator char() { return a; }
|
||||
~S() {}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T tmain() {
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd proc_bind(master)
|
||||
for(int i = 0; i < 1000; i++) {}
|
||||
return T();
|
||||
}
|
||||
|
||||
int main() {
|
||||
// CHECK-LABEL: @main
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd proc_bind(spread)
|
||||
for(int i = 0; i < 1000; i++) {}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd proc_bind(close)
|
||||
for(int i = 0; i < 1000; i++) {}
|
||||
return tmain<int>();
|
||||
}
|
||||
|
||||
// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
|
||||
// CHECK: call void [[OFFL1:@.+]]()
|
||||
// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
|
||||
// CHECK: call void [[OFFL2:@.+]]()
|
||||
// CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
|
||||
// CHECK: ret i32 [[CALL_RET]]
|
||||
|
||||
// CHECK: define{{.+}} void [[OFFL1]](
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
|
||||
|
||||
// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
|
||||
// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
|
||||
// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
|
||||
// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
|
||||
// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
|
||||
// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 4)
|
||||
// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define{{.+}} [[OFFL2]]()
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}})
|
||||
|
||||
// CHECK: define{{.+}} [[OMP_OUTLINED_1]](i32* {{.+}} [[GTID_IN:%.+]],
|
||||
// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
|
||||
// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
|
||||
// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
|
||||
// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
|
||||
// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 3)
|
||||
// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define{{.+}} [[TMAIN]]()
|
||||
// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
|
||||
// CHECK: call void [[OFFL3:@.+]]()
|
||||
|
||||
// CHECK: define{{.+}} [[OFFL3]]()
|
||||
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, {{.+}}, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
|
||||
|
||||
// CHECK: define{{.+}} [[OMP_OUTLINED_3]](i32* {{.+}} [[GTID_IN:%.+]],
|
||||
// CHECK: [[GTID_ADDR:%.+]] = alloca i32*,
|
||||
// CHECK: store i32* [[GTID_IN]], i32** [[GTID_ADDR]],
|
||||
// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_ADDR]],
|
||||
// CHECK: [[GTID_VAL:%.+]] = load i32, i32* [[GTID_REF]],
|
||||
// CHECK: call {{.*}}void @__kmpc_push_proc_bind([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 [[GTID_VAL]], i32 2)
|
||||
// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,348 @@
|
|||
// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
|
||||
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
|
||||
// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
|
||||
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
|
||||
|
||||
// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
|
||||
// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
|
||||
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
template <typename T>
|
||||
T tmain() {
|
||||
T t_var = T();
|
||||
T vec[] = {1, 2};
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd reduction(+: t_var)
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
t_var += (T) i;
|
||||
}
|
||||
return T();
|
||||
}
|
||||
|
||||
int main() {
|
||||
static int sivar;
|
||||
#ifdef LAMBDA
|
||||
// LAMBDA: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
|
||||
|
||||
// LAMBDA-LABEL: @main
|
||||
// LAMBDA: call void [[OUTER_LAMBDA:@.+]](
|
||||
[&]() {
|
||||
// LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
|
||||
// LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
|
||||
// LAMBDA: call void @[[LOFFL1:.+]](
|
||||
// LAMBDA: ret
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd reduction(+: sivar)
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
// LAMBDA: define{{.*}} internal{{.*}} void @[[LOFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]])
|
||||
// LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
|
||||
// LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
|
||||
// LAMBDA: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to
|
||||
// LAMBDA: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[LOUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]])
|
||||
// LAMBDA: ret void
|
||||
|
||||
// LAMBDA: define internal void @[[LOUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
|
||||
// Skip global and bound tid vars
|
||||
// LAMBDA: {{.+}} = alloca i32*,
|
||||
// LAMBDA: {{.+}} = alloca i32*,
|
||||
// LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
|
||||
// LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
|
||||
// LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
|
||||
// LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
|
||||
// LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
|
||||
// LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
|
||||
|
||||
// LAMBDA: call void @__kmpc_for_static_init_4(
|
||||
// LAMBDA: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[LPAR_OUTL:.+]] to
|
||||
// LAMBDA: call void @__kmpc_for_static_fini(
|
||||
// LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
|
||||
// LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
|
||||
// LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
|
||||
// LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
|
||||
// LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
|
||||
// LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
|
||||
// LAMBDA: {{.+}}, label %[[CASE1:.+]]
|
||||
// LAMBDA: {{.+}}, label %[[CASE2:.+]]
|
||||
// LAMBDA: ]
|
||||
// LAMBDA: [[CASE1]]:
|
||||
// LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
|
||||
// LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
|
||||
// LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
|
||||
// LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
|
||||
// LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
|
||||
// LAMBDA: br
|
||||
// LAMBDA: [[CASE2]]:
|
||||
// LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
|
||||
// LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
|
||||
// LAMBDA: br
|
||||
|
||||
// LAMBDA: define internal void @[[LPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
|
||||
|
||||
// Skip global and bound tid vars, and prev lb and ub vars
|
||||
// LAMBDA: {{.+}} = alloca i32*,
|
||||
// LAMBDA: {{.+}} = alloca i32*,
|
||||
// LAMBDA: alloca i{{[0-9]+}},
|
||||
// LAMBDA: alloca i{{[0-9]+}},
|
||||
// LAMBDA: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
|
||||
// skip loop vars
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: alloca i32,
|
||||
// LAMBDA: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
|
||||
// LAMBDA: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
|
||||
// LAMBDA: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
|
||||
// LAMBDA: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
|
||||
// LAMBDA: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
|
||||
|
||||
// LAMBDA: call void @__kmpc_for_static_init_4(
|
||||
// LAMBDA: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
|
||||
// LAMBDA: call void [[INNER_LAMBDA:@.+]](
|
||||
// LAMBDA: call void @__kmpc_for_static_fini(
|
||||
// LAMBDA: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
|
||||
// LAMBDA: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
|
||||
// LAMBDA: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
|
||||
// LAMBDA: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
|
||||
// LAMBDA: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
|
||||
// LAMBDA: switch{{.+}} [[K_RED_RET]], label{{.+}} [
|
||||
// LAMBDA: {{.+}}, label %[[CASE1:.+]]
|
||||
// LAMBDA: {{.+}}, label %[[CASE2:.+]]
|
||||
// LAMBDA: ]
|
||||
// LAMBDA: [[CASE1]]:
|
||||
// LAMBDA-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
|
||||
// LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
|
||||
// LAMBDA-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
|
||||
// LAMBDA: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
|
||||
// LAMBDA: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
|
||||
// LAMBDA: br
|
||||
// LAMBDA: [[CASE2]]:
|
||||
// LAMBDA-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
|
||||
// LAMBDA-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
|
||||
// LAMBDA: br
|
||||
|
||||
sivar += i;
|
||||
|
||||
[&]() {
|
||||
// LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
|
||||
// LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
|
||||
|
||||
sivar += 4;
|
||||
// LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
|
||||
|
||||
// LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
|
||||
// LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
|
||||
// LAMBDA: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], 4
|
||||
// LAMBDA: store i{{[0-9]+}} [[SIVAR_INC]], i{{[0-9]+}}* [[SIVAR_REF]]
|
||||
}();
|
||||
}
|
||||
}();
|
||||
return 0;
|
||||
|
||||
// LAMBDA: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#else
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd reduction(+: sivar)
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
sivar += i;
|
||||
}
|
||||
return tmain<int>();
|
||||
#endif
|
||||
}
|
||||
|
||||
// CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
|
||||
|
||||
// CHECK: define {{.*}}i{{[0-9]+}} @main()
|
||||
// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
|
||||
// CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
|
||||
// CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
|
||||
// CHECK: ret
|
||||
|
||||
// CHECK: define{{.*}} void @[[OFFL1]](i{{64|32}} [[SIVAR_ARG:%.+]])
|
||||
// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}},
|
||||
// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
|
||||
// CHECK-64: [[SIVAR_CONV:%.+]] = bitcast{{.+}} [[SIVAR_ADDR]] to
|
||||
// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_CONV]])
|
||||
// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}}, {{.+}} [[SIVAR_ADDR]])
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void @[[OUTL1]]({{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
|
||||
// Skip global and bound tid vars
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
|
||||
// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
|
||||
// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
|
||||
// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
|
||||
// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
|
||||
// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL:.+]] to
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
|
||||
// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
|
||||
// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
|
||||
// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
|
||||
// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
|
||||
// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
|
||||
// CHECK: {{.+}}, label %[[CASE1:.+]]
|
||||
// CHECK: {{.+}}, label %[[CASE2:.+]]
|
||||
// CHECK: ]
|
||||
// CHECK: [[CASE1]]:
|
||||
// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
|
||||
// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
|
||||
// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
|
||||
// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
|
||||
// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
|
||||
// CHECK: br
|
||||
// CHECK: [[CASE2]]:
|
||||
// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
|
||||
// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
|
||||
// CHECK: br
|
||||
|
||||
// CHECK: define internal void @[[PAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[SIVAR_ARG:%.+]])
|
||||
// Skip global and bound tid vars, and prev lb and ub
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: [[SIVAR_ADDR:%.+]] = alloca i{{.+}}*,
|
||||
// skip loop vars
|
||||
// CHECK: alloca i32,
|
||||
// CHECK: alloca i32,
|
||||
// CHECK: alloca i32,
|
||||
// CHECK: alloca i32,
|
||||
// CHECK: alloca i32,
|
||||
// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{.+}},
|
||||
// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
|
||||
// CHECK: store{{.+}} [[SIVAR_ARG]], {{.+}} [[SIVAR_ADDR]],
|
||||
// CHECK: [[SIVAR_REF:%.+]] = load{{.+}}, {{.+}} [[SIVAR_ADDR]]
|
||||
// CHECK: store{{.+}} 0, {{.+}} [[SIVAR_PRIV]],
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: store{{.+}}, {{.+}} [[SIVAR_PRIV]],
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
|
||||
// CHECK: [[SIVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[SIVAR_PRIV]] to
|
||||
// CHECK: store{{.+}} [[SIVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
|
||||
// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
|
||||
// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
|
||||
// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
|
||||
// CHECK: {{.+}}, label %[[CASE1:.+]]
|
||||
// CHECK: {{.+}}, label %[[CASE2:.+]]
|
||||
// CHECK: ]
|
||||
// CHECK: [[CASE1]]:
|
||||
// CHECK-DAG: [[SIVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_REF]],
|
||||
// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
|
||||
// CHECK-DAG: [[SIVAR_INC:%.+]] = add{{.+}} [[SIVAR_VAL]], [[SIVAR_PRIV_VAL]]
|
||||
// CHECK: store{{.+}} [[SIVAR_INC]], {{.+}} [[SIVAR_REF]],
|
||||
// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
|
||||
// CHECK: br
|
||||
// CHECK: [[CASE2]]:
|
||||
// CHECK-DAG: [[SIVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[SIVAR_PRIV]],
|
||||
// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[SIVAR_REF]], {{.+}} [[SIVAR_PRIV_VAL]]
|
||||
// CHECK: br
|
||||
|
||||
// CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
|
||||
// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
|
||||
// CHECK: call void @[[TOFFL1:.+]]({{.+}})
|
||||
// CHECK: ret
|
||||
|
||||
// CHECK: define{{.*}} void @[[TOFFL1]](i{{64|32}} [[TVAR_ARG:%.+]])
|
||||
// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}},
|
||||
// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
|
||||
// CHECK-64: [[TVAR_CONV:%.+]] = bitcast{{.+}} [[TVAR_ADDR]] to
|
||||
// CHECK-64: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_CONV]])
|
||||
// CHECK-32: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[TOUTL1:.+]] to {{.+}}, {{.+}} [[TVAR_ADDR]])
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void @[[TOUTL1]]({{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
|
||||
// Skip global and bound tid vars
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
|
||||
// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
|
||||
// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
|
||||
// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
|
||||
// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]]
|
||||
// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[TPAR_OUTL:.+]] to
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
|
||||
// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
|
||||
// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
|
||||
// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
|
||||
// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
|
||||
// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
|
||||
// CHECK: {{.+}}, label %[[CASE1:.+]]
|
||||
// CHECK: {{.+}}, label %[[CASE2:.+]]
|
||||
// CHECK: ]
|
||||
// CHECK: [[CASE1]]:
|
||||
// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
|
||||
// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
|
||||
// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
|
||||
// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
|
||||
// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
|
||||
// CHECK: br
|
||||
// CHECK: [[CASE2]]:
|
||||
// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
|
||||
// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
|
||||
// CHECK: br
|
||||
|
||||
// CHECK: define internal void @[[TPAR_OUTL]]({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[TVAR_ARG:%.+]])
|
||||
// Skip global and bound tid vars, and prev lb and ub vars
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: {{.+}} = alloca i32*,
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: alloca i{{[0-9]+}},
|
||||
// CHECK: [[TVAR_ADDR:%.+]] = alloca i{{.+}}*,
|
||||
// skip loop vars
|
||||
// CHECK: alloca i32,
|
||||
// CHECK: alloca i32,
|
||||
// CHECK: alloca i32,
|
||||
// CHECK: alloca i32,
|
||||
// CHECK: alloca i32,
|
||||
// CHECK: [[TVAR_PRIV:%.+]] = alloca i{{.+}},
|
||||
// CHECK: [[RED_LIST:%.+]] = alloca [1 x {{.+}}],
|
||||
// CHECK: store{{.+}} [[TVAR_ARG]], {{.+}} [[TVAR_ADDR]],
|
||||
// CHECK: [[TVAR_REF:%.+]] = load{{.+}}, {{.+}} [[TVAR_ADDR]]
|
||||
// CHECK: store{{.+}} 0, {{.+}} [[TVAR_PRIV]],
|
||||
|
||||
// CHECK: call void @__kmpc_for_static_init_4(
|
||||
// CHECK: store{{.+}}, {{.+}} [[TVAR_PRIV]],
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: [[RED_LIST_GEP:%.+]] = getelementptr{{.+}} [[RED_LIST]],
|
||||
// CHECK: [[TVAR_PRIV_CAST:%.+]] = bitcast{{.+}} [[TVAR_PRIV]] to
|
||||
// CHECK: store{{.+}} [[TVAR_PRIV_CAST]], {{.+}} [[RED_LIST_GEP]],
|
||||
// CHECK: [[RED_LIST_BCAST:%.+]] = bitcast{{.+}} [[RED_LIST]] to
|
||||
// CHECK: [[K_RED_RET:%.+]] = call{{.+}} @__kmpc_reduce_nowait({{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}} [[RED_LIST_BCAST]], {{.+}} [[RED_FUN:@.+]], {{.+}} [[RED_VAR]])
|
||||
// CHECK: switch{{.+}} [[K_RED_RET]], label{{.+}} [
|
||||
// CHECK: {{.+}}, label %[[CASE1:.+]]
|
||||
// CHECK: {{.+}}, label %[[CASE2:.+]]
|
||||
// CHECK: ]
|
||||
// CHECK: [[CASE1]]:
|
||||
// CHECK-DAG: [[TVAR_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_REF]],
|
||||
// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
|
||||
// CHECK-DAG: [[TVAR_INC:%.+]] = add{{.+}} [[TVAR_VAL]], [[TVAR_PRIV_VAL]]
|
||||
// CHECK: store{{.+}} [[TVAR_INC]], {{.+}} [[TVAR_REF]],
|
||||
// CHECK: call void @__kmpc_end_reduce_nowait({{.+}}, {{.+}}, {{.+}} [[RED_VAR]])
|
||||
// CHECK: br
|
||||
// CHECK: [[CASE2]]:
|
||||
// CHECK-DAG: [[TVAR_PRIV_VAL:%.+]] = load{{.+}}, {{.+}} [[TVAR_PRIV]],
|
||||
// CHECK-DAG: [[ATOMIC_RES:%.+]] = atomicrmw add{{.+}} [[TVAR_REF]], {{.+}} [[TVAR_PRIV_VAL]]
|
||||
// CHECK: br
|
||||
|
||||
// CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,402 @@
|
|||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
|
||||
// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
|
||||
#ifdef CK1
|
||||
|
||||
template <typename T, int X, long long Y>
|
||||
struct SS{
|
||||
T a[X];
|
||||
float b;
|
||||
// CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
|
||||
int foo(void) {
|
||||
|
||||
// CK1: call i32 @__tgt_target_teams(
|
||||
// CK1: call void @[[OFFL1:.+]](
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd
|
||||
for(int i = 0; i < X; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
// CK1: call i32 @__tgt_target_teams(
|
||||
// CK1: call void @[[OFFL2:.+]](
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd schedule(static)
|
||||
for(int i = 0; i < X; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
// CK1: call i32 @__tgt_target_teams(
|
||||
// CK1: call void @[[OFFL3:.+]](
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd schedule(static, X/2)
|
||||
for(int i = 0; i < X; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
|
||||
// CK1: call i32 @__tgt_target_teams(
|
||||
// CK1: call void @[[OFFL4:.+]](
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd schedule(dynamic)
|
||||
for(int i = 0; i < X; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
|
||||
// CK1: call i32 @__tgt_target_teams(
|
||||
// CK1: call void @[[OFFL5:.+]](
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd schedule(dynamic, X/2)
|
||||
for(int i = 0; i < X; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
|
||||
// CK1: define internal void @[[OFFL1]](
|
||||
// CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL1:.+]] to {{.+}},
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OUTL1]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4(
|
||||
// CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[PAR_OUTL1]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OFFL2]](
|
||||
// CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL2:.+]] to {{.+}},
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OUTL2]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4(
|
||||
// CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[PAR_OUTL2]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OFFL3]](
|
||||
// CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL3:.+]] to {{.+}},
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OUTL3]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4(
|
||||
// CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[PAR_OUTL3]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33,
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OFFL4]](
|
||||
// CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL4:.+]] to {{.+}},
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OUTL4]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4(
|
||||
// CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[PAR_OUTL4]]({{.+}})
|
||||
// CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
|
||||
// CK1: call {{.+}} @__kmpc_dispatch_next_4(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OFFL5]](
|
||||
// CK1: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTL5:.+]] to {{.+}},
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[OUTL5]]({{.+}})
|
||||
// CK1: call void @__kmpc_for_static_init_4(
|
||||
// CK1: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to
|
||||
// CK1: call void @__kmpc_for_static_fini(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: define internal void @[[PAR_OUTL5]]({{.+}})
|
||||
// CK1: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
|
||||
// CK1: call {{.+}} @__kmpc_dispatch_next_4(
|
||||
// CK1: ret void
|
||||
|
||||
// CK1: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
return a[0];
|
||||
}
|
||||
};
|
||||
|
||||
int teams_template_struct(void) {
|
||||
SS<int, 123, 456> V;
|
||||
return V.foo();
|
||||
|
||||
}
|
||||
#endif // CK1
|
||||
|
||||
// Test host codegen.
|
||||
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
|
||||
// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
|
||||
#ifdef CK2
|
||||
|
||||
template <typename T, int n>
|
||||
int tmain(T argc) {
|
||||
T a[n];
|
||||
int m = 10;
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd schedule(static)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd schedule(static, m)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd schedule(dynamic)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd schedule(dynamic, m)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = (T)0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
int n = 100;
|
||||
int a[n];
|
||||
int m = 10;
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = 0;
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd dist_schedule(static)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = 0;
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd dist_schedule(static, m)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = 0;
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd schedule(dynamic)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = 0;
|
||||
}
|
||||
#pragma omp target
|
||||
#pragma omp teams distribute parallel for simd schedule(dynamic, m)
|
||||
for(int i = 0; i < n; i++) {
|
||||
a[i] = 0;
|
||||
}
|
||||
return tmain<int, 10>(argc);
|
||||
}
|
||||
|
||||
// CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFL1:.+]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFL2:.+]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFL3:.+]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFL4:.+]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFL5:.+]]({{.+}})
|
||||
// CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
|
||||
// CK2: ret
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFL1]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL1:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTL1]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL1:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTL1]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFL2]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 3, {{.+}} @[[OUTL2:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTL2]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL2:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTL2]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFL3]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}} @[[OUTL3:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTL3]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL3:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTL3]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFL4]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL4:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTL4]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL4:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTL4]]({{.+}})
|
||||
// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
|
||||
// CK2: call {{.+}} @__kmpc_dispatch_next_4(
|
||||
// CK2: ret void
|
||||
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFL5]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTL5:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTL5]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTL5:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTL5]]({{.+}})
|
||||
// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
|
||||
// CK2: call {{.+}} @__kmpc_dispatch_next_4(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFLT1:.+]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFLT2:.+]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFLT3:.+]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFLT4:.+]]({{.+}})
|
||||
// CK2: call i32 @__tgt_target_teams(
|
||||
// CK2: call void @[[OFFLT5:.+]]({{.+}})
|
||||
// CK2: ret
|
||||
// CK2-NEXT: }
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFLT1]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT1:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTLT1]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT1:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTLT1]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFLT2]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 1, {{.+}} @[[OUTLT2:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTLT2]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT2:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTLT2]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 34,
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFLT3]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT3:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTLT3]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT3:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTLT3]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 33,
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFLT4]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT4:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTLT4]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT4:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTLT4]]({{.+}})
|
||||
// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
|
||||
// CK2: call {{.+}} @__kmpc_dispatch_next_4(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define {{.*}}void @[[OFFLT5]]({{.+}})
|
||||
// CK2: call void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}} @[[OUTLT5:.+]] to {{.+}},
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[OUTLT5]]({{.+}})
|
||||
// CK2: call void @__kmpc_for_static_init_4(
|
||||
// CK2: call void {{.*}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}} @[[PAR_OUTLT5:.+]] to
|
||||
// CK2: call void @__kmpc_for_static_fini(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: define internal void @[[PAR_OUTLT5]]({{.+}})
|
||||
// CK2: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, i32 35,
|
||||
// CK2: call {{.+}} @__kmpc_dispatch_next_4(
|
||||
// CK2: ret void
|
||||
|
||||
// CK2: !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
|
||||
#endif // CK2
|
||||
#endif // #ifndef HEADER
|
Loading…
Reference in New Issue