forked from OSchip/llvm-project
[OPENMP50]Codegen for scan directive in simd loops.
Added codegen for scandirectives in simd loop. The codegen transforms original code: ``` int x = 0; #pragma omp simd reduction(inscan, +: x) for (..) { <first part> #pragma omp scan inclusive(x) <second part> } ``` into ``` int x = 0; for (..) { int x_priv = 0; <first part> x = x_priv + x; x_priv = x; <second part> } ``` and ``` int x = 0; #pragma omp simd reduction(inscan, +: x) for (..) { <first part> #pragma omp scan exclusive(x) <second part> } ``` into ``` int x = 0; for (..) { int x_priv = 0; <second part> int temp = x; x = x_priv + x; x_priv = temp; <first part> } ``` Differential revision: https://reviews.llvm.org/D78232
This commit is contained in:
parent
948b206fc2
commit
fb80e67f10
|
@ -2075,6 +2075,15 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
|
|||
if (const auto *C = D.getSingleClause<OMPOrderClause>())
|
||||
if (C->getKind() == OMPC_ORDER_concurrent)
|
||||
LoopStack.setParallel(/*Enable=*/true);
|
||||
if ((D.getDirectiveKind() == OMPD_simd ||
|
||||
(getLangOpts().OpenMPSimd &&
|
||||
isOpenMPSimdDirective(D.getDirectiveKind()))) &&
|
||||
llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
|
||||
[](const OMPReductionClause *C) {
|
||||
return C->getModifier() == OMPC_REDUCTION_inscan;
|
||||
}))
|
||||
// Disable parallel access in case of prefix sum.
|
||||
LoopStack.setParallel(/*Enable=*/false);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPSimdFinal(
|
||||
|
@ -2270,6 +2279,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
|
|||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
|
||||
ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
|
||||
OMPFirstScanLoop = true;
|
||||
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||
emitOMPSimdRegion(CGF, S, Action);
|
||||
};
|
||||
|
@ -4191,14 +4202,15 @@ void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
|
|||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
|
||||
// Do not emit code for non-simd directives in simd-only mode.
|
||||
if (getLangOpts().OpenMPSimd && !OMPParentLoopDirectiveForScan)
|
||||
if (!OMPParentLoopDirectiveForScan)
|
||||
return;
|
||||
const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
|
||||
bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
|
||||
SmallVector<const Expr *, 4> Shareds;
|
||||
SmallVector<const Expr *, 4> Privates;
|
||||
SmallVector<const Expr *, 4> LHSs;
|
||||
SmallVector<const Expr *, 4> RHSs;
|
||||
SmallVector<const Expr *, 4> ReductionOps;
|
||||
SmallVector<const Expr *, 4> CopyOps;
|
||||
SmallVector<const Expr *, 4> CopyArrayTemps;
|
||||
SmallVector<const Expr *, 4> CopyArrayElems;
|
||||
|
@ -4209,13 +4221,109 @@ void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
|
|||
Privates.append(C->privates().begin(), C->privates().end());
|
||||
LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
|
||||
RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
|
||||
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
|
||||
CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
|
||||
CopyArrayTemps.append(C->copy_array_temps().begin(),
|
||||
C->copy_array_temps().end());
|
||||
CopyArrayElems.append(C->copy_array_elems().begin(),
|
||||
C->copy_array_elems().end());
|
||||
}
|
||||
bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
|
||||
if (ParentDir.getDirectiveKind() == OMPD_simd ||
|
||||
(getLangOpts().OpenMPSimd &&
|
||||
isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
|
||||
// For simd directive and simd-based directives in simd only mode, use the
|
||||
// following codegen:
|
||||
// int x = 0;
|
||||
// #pragma omp simd reduction(inscan, +: x)
|
||||
// for (..) {
|
||||
// <first part>
|
||||
// #pragma omp scan inclusive(x)
|
||||
// <second part>
|
||||
// }
|
||||
// is transformed to:
|
||||
// int x = 0;
|
||||
// for (..) {
|
||||
// int x_priv = 0;
|
||||
// <first part>
|
||||
// x = x_priv + x;
|
||||
// x_priv = x;
|
||||
// <second part>
|
||||
// }
|
||||
// and
|
||||
// int x = 0;
|
||||
// #pragma omp simd reduction(inscan, +: x)
|
||||
// for (..) {
|
||||
// <first part>
|
||||
// #pragma omp scan exclusive(x)
|
||||
// <second part>
|
||||
// }
|
||||
// to
|
||||
// int x = 0;
|
||||
// for (..) {
|
||||
// int x_priv = 0;
|
||||
// <second part>
|
||||
// int temp = x;
|
||||
// x = x_priv + x;
|
||||
// x_priv = temp;
|
||||
// <first part>
|
||||
// }
|
||||
llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
|
||||
EmitBranch(IsInclusive
|
||||
? OMPScanReduce
|
||||
: BreakContinueStack.back().ContinueBlock.getBlock());
|
||||
EmitBlock(OMPScanDispatch);
|
||||
{
|
||||
// New scope for correct construction/destruction of temp variables for
|
||||
// exclusive scan.
|
||||
LexicalScope Scope(*this, S.getSourceRange());
|
||||
EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
|
||||
EmitBlock(OMPScanReduce);
|
||||
if (!IsInclusive) {
|
||||
// Create temp var and copy LHS value to this temp value.
|
||||
// TMP = LHS;
|
||||
for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
|
||||
const Expr *PrivateExpr = Privates[I];
|
||||
const Expr *TempExpr = CopyArrayTemps[I];
|
||||
EmitAutoVarDecl(
|
||||
*cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
|
||||
LValue DestLVal = EmitLValue(TempExpr);
|
||||
LValue SrcLVal = EmitLValue(LHSs[I]);
|
||||
EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
|
||||
SrcLVal.getAddress(*this),
|
||||
cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
|
||||
cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
|
||||
CopyOps[I]);
|
||||
}
|
||||
}
|
||||
CGM.getOpenMPRuntime().emitReduction(
|
||||
*this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
|
||||
{/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
|
||||
for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
|
||||
const Expr *PrivateExpr = Privates[I];
|
||||
LValue DestLVal;
|
||||
LValue SrcLVal;
|
||||
if (IsInclusive) {
|
||||
DestLVal = EmitLValue(RHSs[I]);
|
||||
SrcLVal = EmitLValue(LHSs[I]);
|
||||
} else {
|
||||
const Expr *TempExpr = CopyArrayTemps[I];
|
||||
DestLVal = EmitLValue(RHSs[I]);
|
||||
SrcLVal = EmitLValue(TempExpr);
|
||||
}
|
||||
EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
|
||||
SrcLVal.getAddress(*this),
|
||||
cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
|
||||
cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
|
||||
CopyOps[I]);
|
||||
}
|
||||
}
|
||||
EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
|
||||
OMPScanExitBlock = IsInclusive
|
||||
? BreakContinueStack.back().ContinueBlock.getBlock()
|
||||
: OMPScanReduce;
|
||||
EmitBlock(OMPAfterScanBlock);
|
||||
return;
|
||||
}
|
||||
if (!IsInclusive) {
|
||||
EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
|
||||
EmitBlock(OMPScanExitBlock);
|
||||
|
@ -6313,6 +6421,7 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
|
|||
}
|
||||
if (isOpenMPSimdDirective(D.getDirectiveKind())) {
|
||||
(void)GlobalsScope.Privatize();
|
||||
ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
|
||||
emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
|
||||
} else {
|
||||
if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
|
||||
|
|
|
@ -15150,24 +15150,37 @@ static bool actOnOMPReductionKindClause(
|
|||
S.ActOnFinishFullExpr(CopyOpRes.get(), /*DiscardedValue=*/true);
|
||||
if (!CopyOpRes.isUsable())
|
||||
continue;
|
||||
// Build temp array for prefix sum.
|
||||
auto *Dim = new (S.Context)
|
||||
OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
|
||||
QualType ArrayTy =
|
||||
S.Context.getVariableArrayType(PrivateTy, Dim, ArrayType::Normal,
|
||||
/*IndexTypeQuals=*/0, {ELoc, ELoc});
|
||||
VarDecl *TempArrayVD =
|
||||
buildVarDecl(S, ELoc, ArrayTy, D->getName(),
|
||||
D->hasAttrs() ? &D->getAttrs() : nullptr);
|
||||
// Add a constructor to the temp decl.
|
||||
S.ActOnUninitializedDecl(TempArrayVD);
|
||||
TempArrayRes = buildDeclRefExpr(S, TempArrayVD, ArrayTy, ELoc);
|
||||
TempArrayElem =
|
||||
S.DefaultFunctionArrayLvalueConversion(TempArrayRes.get());
|
||||
auto *Idx = new (S.Context)
|
||||
OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
|
||||
TempArrayElem = S.CreateBuiltinArraySubscriptExpr(TempArrayElem.get(),
|
||||
ELoc, Idx, ELoc);
|
||||
// For simd directive and simd-based directives in simd mode no need to
|
||||
// construct temp array, need just a single temp element.
|
||||
if (Stack->getCurrentDirective() == OMPD_simd ||
|
||||
(S.getLangOpts().OpenMPSimd &&
|
||||
isOpenMPSimdDirective(Stack->getCurrentDirective()))) {
|
||||
VarDecl *TempArrayVD =
|
||||
buildVarDecl(S, ELoc, PrivateTy, D->getName(),
|
||||
D->hasAttrs() ? &D->getAttrs() : nullptr);
|
||||
// Add a constructor to the temp decl.
|
||||
S.ActOnUninitializedDecl(TempArrayVD);
|
||||
TempArrayRes = buildDeclRefExpr(S, TempArrayVD, PrivateTy, ELoc);
|
||||
} else {
|
||||
// Build temp array for prefix sum.
|
||||
auto *Dim = new (S.Context)
|
||||
OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
|
||||
QualType ArrayTy =
|
||||
S.Context.getVariableArrayType(PrivateTy, Dim, ArrayType::Normal,
|
||||
/*IndexTypeQuals=*/0, {ELoc, ELoc});
|
||||
VarDecl *TempArrayVD =
|
||||
buildVarDecl(S, ELoc, ArrayTy, D->getName(),
|
||||
D->hasAttrs() ? &D->getAttrs() : nullptr);
|
||||
// Add a constructor to the temp decl.
|
||||
S.ActOnUninitializedDecl(TempArrayVD);
|
||||
TempArrayRes = buildDeclRefExpr(S, TempArrayVD, ArrayTy, ELoc);
|
||||
TempArrayElem =
|
||||
S.DefaultFunctionArrayLvalueConversion(TempArrayRes.get());
|
||||
auto *Idx = new (S.Context)
|
||||
OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue);
|
||||
TempArrayElem = S.CreateBuiltinArraySubscriptExpr(TempArrayElem.get(),
|
||||
ELoc, Idx, ELoc);
|
||||
}
|
||||
}
|
||||
|
||||
// OpenMP [2.15.4.6, Restrictions, p.2]
|
||||
|
|
|
@ -0,0 +1,277 @@
|
|||
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
|
||||
|
||||
// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-pch -o %t %s
|
||||
// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
|
||||
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
|
||||
//
|
||||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
void foo();
|
||||
void bar();
|
||||
|
||||
// CHECK-LABEL: baz
|
||||
void baz() {
|
||||
int a = 0;
|
||||
|
||||
// CHECK: store i32 0, i32* [[A_ADDR:%.+]],
|
||||
// CHECK: store i32 0, i32* [[OMP_CNT:%.+]],
|
||||
// CHECK: br label %[[OMP_HEADER:.+]]
|
||||
|
||||
// CHECK: [[OMP_HEADER]]:
|
||||
// CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]],
|
||||
// CHECK: [[CMP:%.+]] = icmp slt i32 [[CNT_VAL]], 10
|
||||
// CHECK: br i1 [[CMP]], label %[[OMP_BODY:.+]], label %[[OMP_END:.+]]
|
||||
#pragma omp simd reduction(inscan, + : a)
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
// CHECK: [[OMP_BODY]]:
|
||||
|
||||
// i = OMP_CNT*1 + 0;
|
||||
// CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]],
|
||||
// CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT_VAL]], 1
|
||||
// CHECK: [[ADD:%.+]] = add nsw i32 0, [[MUL]]
|
||||
// CHECK: store i32 [[ADD]], i32* [[I_ADDR:%.+]],
|
||||
|
||||
// A_PRIV = 0;
|
||||
// CHECK: store i32 0, i32* [[A_PRIV_ADDR:%.+]],
|
||||
|
||||
// goto DISPATCH;
|
||||
// CHECK: br label %[[DISPATCH:[^,]+]]
|
||||
|
||||
// INPUT_PHASE:
|
||||
// foo();
|
||||
// goto REDUCE;
|
||||
// CHECK: [[INPUT_PHASE:.+]]:
|
||||
// CHECK: call void @{{.*}}foo{{.*}}()
|
||||
// CHECK: br label %[[REDUCE:[^,]+]]
|
||||
foo();
|
||||
|
||||
// DISPATCH:
|
||||
// goto INPUT_PHASE;
|
||||
// CHECK: [[DISPATCH]]:
|
||||
// CHECK: br label %[[INPUT_PHASE]]
|
||||
|
||||
// REDUCE:
|
||||
// A = A_PRIV + A;
|
||||
// A_PRIV = A;
|
||||
// goto SCAN_PHASE;
|
||||
// CHECK: [[REDUCE]]:
|
||||
// CHECK: [[A:%.+]] = load i32, i32* [[A_ADDR]],
|
||||
// CHECK: [[A_PRIV:%.+]] = load i32, i32* [[A_PRIV_ADDR]],
|
||||
// CHECK: [[SUM:%.+]] = add nsw i32 [[A]], [[A_PRIV]]
|
||||
// CHECK: store i32 [[SUM]], i32* [[A_ADDR]],
|
||||
// CHECK: [[A:%.+]] = load i32, i32* [[A_ADDR]],
|
||||
// CHECK: store i32 [[A]], i32* [[A_PRIV_ADDR]],
|
||||
// CHECK: br label %[[SCAN_PHASE:[^,]+]]
|
||||
#pragma omp scan inclusive(a)
|
||||
|
||||
// SCAN_PHASE:
|
||||
// bar();
|
||||
// goto CONTINUE;
|
||||
// CHECK: [[SCAN_PHASE]]:
|
||||
// CHECK: call void @{{.*}}bar{{.*}}()
|
||||
// CHECK: br label %[[CONTINUE:[^,]+]]
|
||||
bar();
|
||||
|
||||
// CHECK: [[CONTINUE]]:
|
||||
// CHECK: br label %[[INC_BLOCK:[^,]+]]
|
||||
|
||||
// ++OMP_CNT;
|
||||
// CHECK: [[INC_BLOCK]]:
|
||||
// CHECK: [[CNT:%.+]] = load i32, i32* [[OMP_CNT]],
|
||||
// CHECK: [[INC:%.+]] = add nsw i32 [[CNT]], 1
|
||||
// CHECK: store i32 [[INC]], i32* [[OMP_CNT]],
|
||||
// CHECK: br label %[[OMP_HEADER]]
|
||||
}
|
||||
// CHECK: [[OMP_END]]:
|
||||
}
|
||||
|
||||
struct S {
|
||||
int a;
|
||||
S() {}
|
||||
~S() {}
|
||||
S& operator+(const S&);
|
||||
S& operator=(const S&);
|
||||
};
|
||||
|
||||
// CHECK-LABEL: xyz
|
||||
void xyz() {
|
||||
S s[2];
|
||||
|
||||
// CHECK: [[S_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ADDR:%.+]], i{{.+}} 0, i{{.+}} 0
|
||||
// CHECK: [[S_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_BEGIN]], i{{.+}} 2
|
||||
// CHECK: br label %[[ARRAY_INIT:.+]]
|
||||
// CHECK: [[ARRAY_INIT]]:
|
||||
// CHECK: [[S_CUR:%.+]] = phi %struct.S* [ [[S_BEGIN]], %{{.+}} ], [ [[S_NEXT:%.+]], %[[ARRAY_INIT]] ]
|
||||
// CHECK: call void [[CONSTR:@.+]](%struct.S* [[S_CUR]])
|
||||
// CHECK: [[S_NEXT]] = getelementptr inbounds %struct.S, %struct.S* [[S_CUR]], i{{.+}} 1
|
||||
// CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_NEXT]], [[S_END]]
|
||||
// CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_INIT]]
|
||||
// CHECK: [[DONE]]:
|
||||
// CHECK: store i32 0, i32* [[OMP_CNT:%.+]],
|
||||
// CHECK: br label %[[OMP_HEADER:.+]]
|
||||
|
||||
// CHECK: [[OMP_HEADER]]:
|
||||
// CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]],
|
||||
// CHECK: [[CMP:%.+]] = icmp slt i32 [[CNT_VAL]], 10
|
||||
// CHECK: br i1 [[CMP]], label %[[OMP_BODY:.+]], label %[[OMP_END:.+]]
|
||||
#pragma omp simd reduction(inscan, + : s)
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
// CHECK: [[OMP_BODY]]:
|
||||
|
||||
// i = OMP_CNT*1 + 0;
|
||||
// CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]],
|
||||
// CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT_VAL]], 1
|
||||
// CHECK: [[ADD:%.+]] = add nsw i32 0, [[MUL]]
|
||||
// CHECK: store i32 [[ADD]], i32* [[I_ADDR:%.+]],
|
||||
|
||||
// S S_PRIV[2];
|
||||
// CHECK: [[S_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_PRIV_ADDR:%.+]], i{{.+}} 0, i{{.+}} 0
|
||||
// CHECK: [[S_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_BEGIN]], i{{.+}} 2
|
||||
// CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_BEGIN]], [[S_END]]
|
||||
// CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_INIT:[^,]+]]
|
||||
// CHECK: [[ARRAY_INIT]]:
|
||||
// CHECK: [[S_CUR:%.+]] = phi %struct.S* [ [[S_BEGIN]], %[[OMP_BODY]] ], [ [[S_NEXT:%.+]], %[[ARRAY_INIT]] ]
|
||||
// CHECK: call void [[CONSTR]](%struct.S* [[S_CUR]])
|
||||
// CHECK: [[S_NEXT]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_CUR]], i{{.+}} 1
|
||||
// CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_NEXT]], [[S_END]]
|
||||
// CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_INIT]]
|
||||
// CHECK: [[DONE]]:
|
||||
// CHECK: [[LHS_BEGIN:%.+]] = bitcast [2 x %struct.S]* [[S_ADDR]] to %struct.S*
|
||||
// CHECK: [[RHS_BEGIN:%.+]] = bitcast [2 x %struct.S]* [[S_PRIV_ADDR]] to %struct.S*
|
||||
|
||||
// goto DISPATCH;
|
||||
// CHECK: br label %[[DISPATCH:[^,]+]]
|
||||
|
||||
// SCAN_PHASE:
|
||||
// foo();
|
||||
// goto CONTINUE;
|
||||
// CHECK: [[SCAN_PHASE:.+]]:
|
||||
// CHECK: call void @{{.*}}foo{{.*}}()
|
||||
// CHECK: br label %[[CONTINUE:[^,]+]]
|
||||
foo();
|
||||
|
||||
// DISPATCH:
|
||||
// goto INPUT_PHASE;
|
||||
// CHECK: [[DISPATCH]]:
|
||||
// CHECK: br label %[[INPUT_PHASE:[^,]+]]
|
||||
|
||||
// REDUCE:
|
||||
// TEMP = S;
|
||||
// S = S_PRIV + S;
|
||||
// S_PRIV = TEMP;
|
||||
// goto SCAN_PHASE;
|
||||
// CHECK: [[REDUCE:.+]]:
|
||||
|
||||
// S TEMP[2];
|
||||
// CHECK: [[TEMP_ARR_BEG:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TEMP_ARR:%.+]], i32 0, i32 0
|
||||
// CHECK: [[TEMP_ARR_END:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[TEMP_ARR_BEG]], i64 2
|
||||
// CHECK: br label %[[BODY:[^,]+]]
|
||||
// CHECK: [[BODY]]:
|
||||
// CHECK: [[CUR:%.+]] = phi %struct.S* [ [[TEMP_ARR_BEG]], %[[REDUCE]] ], [ [[NEXT:%.+]], %[[BODY]] ]
|
||||
// CHECK: call void [[CONSTR]](%struct.S* [[CUR]])
|
||||
// CHECK: [[NEXT]] = getelementptr inbounds %struct.S, %struct.S* [[CUR]], i64 1
|
||||
// CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[NEXT]], [[TEMP_ARR_END]]
|
||||
// CHECK: br i1 [[IS_DONE]], label %[[EXIT:[^,]+]], label %[[BODY]]
|
||||
// CHECK: [[EXIT]]:
|
||||
|
||||
// TEMP = S;
|
||||
// CHECK: [[TEMP_ARR_BEG:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TEMP_ARR]], i32 0, i32 0
|
||||
// CHECK: [[TEMP_ARR_END:%.+]] = getelementptr %struct.S, %struct.S* [[TEMP_ARR_BEG]], i64 2
|
||||
// CHECK: [[IS_EMPTY:%.+]] = icmp eq %struct.S* [[TEMP_ARR_BEG]], [[TEMP_ARR_END]]
|
||||
// CHECK: br i1 [[IS_EMPTY]], label %[[EXIT:[^,]+]], label %[[BODY:[^,]+]]
|
||||
// CHECK: [[BODY]]:
|
||||
// CHECK: [[CUR_SRC:%.+]] = phi %struct.S* [ [[LHS_BEGIN]], %{{.+}} ], [ [[SRC_NEXT:%.+]], %[[BODY]] ]
|
||||
// CHECK: [[CUR_DEST:%.+]] = phi %struct.S* [ [[TEMP_ARR_BEG]], %{{.+}} ], [ [[DEST_NEXT:%.+]], %[[BODY]] ]
|
||||
// CHECK: call {{.*}}%struct.S* [[S_COPY:@.+]](%struct.S* [[CUR_DEST]], %struct.S* {{.*}}[[CUR_SRC]])
|
||||
// CHECK: [[DEST_NEXT:%.+]] = getelementptr %struct.S, %struct.S* [[CUR_DEST]], i32 1
|
||||
// CHECK: [[SRC_NEXT:%.+]] = getelementptr %struct.S, %struct.S* [[CUR_SRC]], i32 1
|
||||
// CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[DEST_NEXT]], [[TEMP_ARR_END]]
|
||||
// CHECK: br i1 [[IS_DONE]], label %[[EXIT]], label %[[BODY]]
|
||||
// CHECK: [[EXIT]]:
|
||||
|
||||
// S = S_PRIV + S;
|
||||
// CHECK: [[LHS_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[LHS_BEGIN]], i{{.+}} 2
|
||||
// CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[LHS_BEGIN]], [[LHS_END]]
|
||||
// CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_REDUCE_COPY:[^,]+]]
|
||||
// CHECK: [[ARRAY_REDUCE_COPY]]:
|
||||
// CHECK: [[SRC_CUR:%.+]] = phi %struct.S* [ [[RHS_BEGIN]], %[[EXIT]] ], [ [[SRC_NEXT:%.+]], %[[ARRAY_REDUCE_COPY]] ]
|
||||
// CHECK: [[DEST_CUR:%.+]] = phi %struct.S* [ [[LHS_BEGIN]], %[[EXIT]] ], [ [[DEST_NEXT:%.+]], %[[ARRAY_REDUCE_COPY]] ]
|
||||
// CHECK: [[SUM:%.+]] = call {{.*}}%struct.S* @{{.+}}(%struct.S* [[DEST_CUR]], %struct.S* {{.*}}[[SRC_CUR]])
|
||||
// CHECK: call {{.*}}%struct.S* [[S_COPY]](%struct.S* [[DEST_CUR]], %struct.S* {{.*}}[[SUM]])
|
||||
// CHECK: [[DEST_NEXT]] = getelementptr {{.*}}%struct.S, %struct.S* [[DEST_CUR]], i{{.+}} 1
|
||||
// CHECK: [[SRC_NEXT]] = getelementptr {{.*}}%struct.S, %struct.S* [[SRC_CUR]], i{{.+}} 1
|
||||
// CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[DEST_NEXT]], [[LHS_END]]
|
||||
// CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_REDUCE_COPY]]
|
||||
// CHECK: [[DONE]]:
|
||||
|
||||
// S_PRIV = TEMP;
|
||||
// CHECK: [[TEMP_ARR_BEG:%.+]] = bitcast [2 x %struct.S]* [[TEMP_ARR]] to %struct.S*
|
||||
// CHECK: [[RHS_END:%.+]] = getelementptr %struct.S, %struct.S* [[RHS_BEGIN]], i64 2
|
||||
// CHECK: [[IS_EMPTY:%.+]] = icmp eq %struct.S* [[RHS_BEGIN]], [[RHS_END]]
|
||||
// CHECK: br i1 [[IS_EMPTY]], label %[[EXIT:[^,]+]], label %[[BODY:[^,]+]]
|
||||
// CHECK: [[BODY]]:
|
||||
// CHECK: [[CUR_SRC:%.+]] = phi %struct.S* [ [[TEMP_ARR_BEG]], %[[DONE]] ], [ [[SRC_NEXT:%.+]], %[[BODY]] ]
|
||||
// CHECK: [[CUR_DEST:%.+]] = phi %struct.S* [ [[RHS_BEGIN]], %[[DONE]] ], [ [[DEST_NEXT:%.+]], %[[BODY]] ]
|
||||
// CHECK: call {{.*}}%struct.S* [[S_COPY]](%struct.S* [[CUR_DEST]], %struct.S* {{.*}}[[CUR_SRC]])
|
||||
// CHECK: [[DEST_NEXT]] = getelementptr %struct.S, %struct.S* [[CUR_DEST]], i32 1
|
||||
// CHECK: [[SRC_NEXT]] = getelementptr %struct.S, %struct.S* [[CUR_SRC]], i32 1
|
||||
// CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[DEST_NEXT]], [[RHS_END]]
|
||||
// CHECK: br i1 [[IS_DONE]], label %[[DONE:[^,]+]], label %[[BODY]]
|
||||
// CHECK: [[DONE]]:
|
||||
|
||||
// TEMP.~S()
|
||||
// CHECK: [[TEMP_ARR_BEG:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TEMP_ARR]], i32 0, i32 0
|
||||
// CHECK: [[TEMP_ARR_END:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[TEMP_ARR_BEG]], i64 2
|
||||
// CHECK: br label %[[BODY:[^,]+]]
|
||||
// CHECK: [[BODY]]:
|
||||
// CHECK: [[CUR:%.+]] = phi %struct.S* [ [[TEMP_ARR_END]], %[[DONE]] ], [ [[PREV:%.+]], %[[BODY]] ]
|
||||
// CHECK: [[PREV]] = getelementptr inbounds %struct.S, %struct.S* [[CUR]], i64 -1
|
||||
// CHECK: call void [[DESTR:@.+]](%struct.S* [[PREV]])
|
||||
// CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[PREV]], [[TEMP_ARR_BEG]]
|
||||
// CHECK: br i1 [[IS_DONE]], label %[[EXIT:[^,]+]], label %[[BODY]]
|
||||
// CHECK: [[EXIT]]:
|
||||
|
||||
// goto SCAN_PHASE;
|
||||
// CHECK: br label %[[SCAN_PHASE]]
|
||||
#pragma omp scan exclusive(s)
|
||||
|
||||
// INPUT_PHASE:
|
||||
// bar();
|
||||
// goto REDUCE;
|
||||
// CHECK: [[INPUT_PHASE]]:
|
||||
// CHECK: call void @{{.*}}bar{{.*}}()
|
||||
// CHECK: br label %[[REDUCE]]
|
||||
bar();
|
||||
|
||||
// CHECK: [[CONTINUE]]:
|
||||
|
||||
// S_PRIV[2].~S();
|
||||
// CHECK: [[S_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_PRIV_ADDR]], i{{.+}} 0, i{{.+}} 0
|
||||
// CHECK: [[S_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_BEGIN]], i{{.+}} 2
|
||||
// CHECK: br label %[[ARRAY_DESTR:[^,]+]]
|
||||
// CHECK: [[ARRAY_DESTR]]:
|
||||
// CHECK: [[S_CUR:%.+]] = phi %struct.S* [ [[S_END]], %[[CONTINUE]] ], [ [[S_PREV:%.+]], %[[ARRAY_DESTR]] ]
|
||||
// CHECK: [[S_PREV]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_CUR]], i{{.+}} -1
|
||||
// CHECK: call void [[DESTR]](%struct.S* [[S_PREV]])
|
||||
// CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_PREV]], [[S_BEGIN]]
|
||||
// CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_DESTR]]
|
||||
// CHECK: [[DONE]]:
|
||||
// CHECK: br label %[[INC_BLOCK:[^,]+]]
|
||||
|
||||
// ++OMP_CNT;
|
||||
// CHECK: [[INC_BLOCK]]:
|
||||
// CHECK: [[CNT:%.+]] = load i32, i32* [[OMP_CNT]],
|
||||
// CHECK: [[INC:%.+]] = add nsw i32 [[CNT]], 1
|
||||
// CHECK: store i32 [[INC]], i32* [[OMP_CNT]],
|
||||
// CHECK: br label %[[OMP_HEADER]]
|
||||
}
|
||||
// CHECK: [[OMP_END]]:
|
||||
}
|
||||
|
||||
// CHECK-NOT: !{!"llvm.loop.parallel_accesses"
|
||||
|
||||
#endif // HEADER
|
Loading…
Reference in New Issue