[OMPIRBuilder] Add ordered directive to OMPBuilder

Add support for ordered directive in the OpenMPIRBuilder.

This patch also modidies clang to use the ordered directive when the
option -fopenmp-enable-irbuilder is enabled.

Also fix one ICE when parsing one canonical for loop with the relational
operator LE or GE in openmp region by replacing unary increment
operation of the expression of the variable "Expr A" minus the variable
"Expr B" (++(Expr A - Expr B)) with binary addition operation of the
experssion of the variable "Expr A" minus the variable "Expr B" and the
expression with constant value "1" (Expr A - Expr B + "1").

Reviewed By: Meinersbur, kiranchandramohan

Differential Revision: https://reviews.llvm.org/D107430
This commit is contained in:
PeixinQiao 2021-09-03 09:37:58 +08:00
parent 91eda9c30f
commit a42380ce83
9 changed files with 788 additions and 110 deletions

View File

@ -5312,6 +5312,74 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
}
void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
if (CGM.getLangOpts().OpenMPIRBuilder) {
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
if (S.hasClausesOfKind<OMPDependClause>()) {
// The ordered directive with depend clause.
assert(!S.hasAssociatedStmt() &&
"No associated statement must be in ordered depend construct.");
InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
AllocaInsertPt->getIterator());
for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) {
unsigned NumLoops = DC->getNumLoops();
QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth(
/*DestWidth=*/64, /*Signed=*/1);
llvm::SmallVector<llvm::Value *> StoreValues;
for (unsigned I = 0; I < NumLoops; I++) {
const Expr *CounterVal = DC->getLoopData(I);
assert(CounterVal);
llvm::Value *StoreValue = EmitScalarConversion(
EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
CounterVal->getExprLoc());
StoreValues.emplace_back(StoreValue);
}
bool IsDependSource = false;
if (DC->getDependencyKind() == OMPC_DEPEND_source)
IsDependSource = true;
Builder.restoreIP(OMPBuilder.createOrderedDepend(
Builder, AllocaIP, NumLoops, StoreValues, ".cnt.addr",
IsDependSource));
}
} else {
// The ordered directive with threads or simd clause, or without clause.
// Without clause, it behaves as if the threads clause is specified.
const auto *C = S.getSingleClause<OMPSIMDClause>();
auto FiniCB = [this](InsertPointTy IP) {
OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
};
auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
InsertPointTy CodeGenIP,
llvm::BasicBlock &FiniBB) {
const CapturedStmt *CS = S.getInnermostCapturedStmt();
if (C) {
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
GenerateOpenMPCapturedVars(*CS, CapturedVars);
llvm::Function *OutlinedFn =
emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
assert(S.getBeginLoc().isValid() &&
"Outlined function call location must be valid.");
ApplyDebugLocation::CreateDefaultArtificial(*this, S.getBeginLoc());
OMPBuilderCBHelpers::EmitCaptureStmt(*this, CodeGenIP, FiniBB,
OutlinedFn, CapturedVars);
} else {
OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
FiniBB);
OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CS->getCapturedStmt(),
CodeGenIP, FiniBB);
}
};
OMPLexicalScope Scope(*this, S, OMPD_unknown);
Builder.restoreIP(
OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C));
}
return;
}
if (S.hasClausesOfKind<OMPDependClause>()) {
assert(!S.hasAssociatedStmt() &&
"No associated statement must be in ordered depend construct.");

View File

@ -1775,6 +1775,24 @@ public:
CGF.Builder.CreateBr(&FiniBB);
}
static void EmitCaptureStmt(CodeGenFunction &CGF, InsertPointTy CodeGenIP,
llvm::BasicBlock &FiniBB, llvm::Function *Fn,
ArrayRef<llvm::Value *> Args) {
llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
if (llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator())
CodeGenIPBBTI->eraseFromParent();
CGF.Builder.SetInsertPoint(CodeGenIPBB);
if (Fn->doesNotThrow())
CGF.EmitNounwindRuntimeCall(Fn, Args);
else
CGF.EmitRuntimeCall(Fn, Args);
if (CGF.Builder.saveIP().isSet())
CGF.Builder.CreateBr(&FiniBB);
}
/// RAII for preserving necessary info during Outlined region body codegen.
class OutlinedRegionBodyRAII {

View File

@ -5320,8 +5320,9 @@ static CapturedStmt *buildDistanceFunc(Sema &Actions, QualType LogicalTy,
if (Rel == BO_LE || Rel == BO_GE) {
// Add one to the range if the relational operator is inclusive.
Range =
AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_PreInc, Range));
Range = AssertSuccess(Actions.BuildBinOp(
nullptr, {}, BO_Add, Range,
Actions.ActOnIntegerConstant(SourceLocation(), 1).get()));
}
// Divide by the absolute step amount.

View File

@ -1,11 +1,19 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK1
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1,CHECK1-NORMAL
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2,CHECK2-NORMAL
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefix=CHECK3
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1,CHECK1-IRBUILDER
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2,CHECK2-IRBUILDER
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3,CHECK3-NORMAL
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -fopenmp-version=45 -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK4
// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4,CHECK4-NORMAL
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3,CHECK3-IRBUILDER
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -fopenmp-version=45 -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4,CHECK4-IRBUILDER
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK5
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
@ -128,7 +136,7 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -137,9 +145,12 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK1-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -157,6 +168,7 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]]
// CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4
@ -188,6 +200,7 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
@ -195,7 +208,9 @@ void foo_simd(int low, int up) {
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK1-NEXT: ret void
//
//
@ -213,7 +228,7 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -222,9 +237,11 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8
// CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -243,6 +260,7 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127
// CHECK1-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]]
// CHECK1-NEXT: store i64 [[ADD1]], i64* [[I]], align 8
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8
@ -270,6 +288,7 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1
// CHECK1-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK1-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
@ -277,7 +296,9 @@ void foo_simd(int low, int up) {
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK1-NEXT: ret void
//
//
@ -303,7 +324,7 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I8:%.*]] = alloca i8, align 1
// CHECK1-NEXT: [[X9:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -336,9 +357,11 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
// CHECK1-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741894, i64 0, i64 [[TMP6]], i64 1, i64 1)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -369,6 +392,7 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]]
// CHECK1-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32
// CHECK1-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1
@ -400,6 +424,7 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1
// CHECK1-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK1-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
@ -409,7 +434,9 @@ void foo_simd(int low, int up) {
// CHECK1: omp.dispatch.end:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK1-NEXT: ret void
//
//
@ -430,7 +457,7 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i8, align 1
// CHECK1-NEXT: [[X2:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -440,9 +467,11 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741893, i32 0, i32 199, i32 1, i32 1)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -470,6 +499,7 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]]
// CHECK1-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1
@ -501,6 +531,7 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1
// CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
@ -508,7 +539,9 @@ void foo_simd(int low, int up) {
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK1-NEXT: ret void
//
//
@ -535,7 +568,7 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I28:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4
// CHECK1-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4
@ -622,9 +655,11 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]])
// CHECK1-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -632,13 +667,15 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK1-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]]
// CHECK1: omp.inner.for.cond29:
// CHECK1-IRBUILDER: omp.inner.for.cond30:
// CHECK1-NORMAL: omp.inner.for.cond29:
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
// CHECK1-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1
// CHECK1-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
// CHECK1-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
// CHECK1: omp.inner.for.body32:
// CHECK1-IRBUILDER: omp.inner.for.body33:
// CHECK1-NORMAL: omp.inner.for.body32:
// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7
// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK1-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1
@ -650,15 +687,19 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7
// CHECK1-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]]
// CHECK1: omp.body.continue37:
// CHECK1-IRBUILDER: omp.body.continue38:
// CHECK1-NORMAL: omp.body.continue37:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]]
// CHECK1: omp.inner.for.inc38:
// CHECK1-IRBUILDER: omp.inner.for.inc39:
// CHECK1-NORMAL: omp.inner.for.inc38:
// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK1-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1
// CHECK1-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK1: omp.inner.for.end40:
// CHECK1-IRBUILDER: omp.inner.for.end42:
// CHECK1-NORMAL: omp.inner.for.end40:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
@ -681,7 +722,9 @@ void foo_simd(int low, int up) {
// CHECK1: .omp.final.done:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK1-NEXT: ret void
//
//
@ -725,7 +768,7 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -734,9 +777,12 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK2-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK2: omp.dispatch.cond:
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -754,6 +800,7 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]]
// CHECK2-NEXT: store i32 [[SUB]], i32* [[I]], align 4
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4
@ -785,6 +832,7 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK2-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
@ -792,7 +840,9 @@ void foo_simd(int low, int up) {
// CHECK2: omp.dispatch.inc:
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK2: omp.dispatch.end:
// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK2-NEXT: ret void
//
//
@ -810,7 +860,7 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -819,9 +869,11 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8
// CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1)
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK2: omp.dispatch.cond:
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -840,6 +892,7 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127
// CHECK2-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]]
// CHECK2-NEXT: store i64 [[ADD1]], i64* [[I]], align 8
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8
@ -867,6 +920,7 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1
// CHECK2-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK2-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
@ -874,7 +928,9 @@ void foo_simd(int low, int up) {
// CHECK2: omp.dispatch.inc:
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK2: omp.dispatch.end:
// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK2-NEXT: ret void
//
//
@ -900,7 +956,7 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I8:%.*]] = alloca i8, align 1
// CHECK2-NEXT: [[X9:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -933,9 +989,11 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
// CHECK2-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741894, i64 0, i64 [[TMP6]], i64 1, i64 1)
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK2: omp.dispatch.cond:
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
// CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -966,6 +1024,7 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]]
// CHECK2-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32
// CHECK2-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1
@ -997,6 +1056,7 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK2-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1
// CHECK2-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK2-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
@ -1006,7 +1066,9 @@ void foo_simd(int low, int up) {
// CHECK2: omp.dispatch.end:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK2-NEXT: ret void
//
//
@ -1027,7 +1089,7 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i8, align 1
// CHECK2-NEXT: [[X2:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -1037,9 +1099,11 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741893, i32 0, i32 199, i32 1, i32 1)
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK2: omp.dispatch.cond:
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -1067,6 +1131,7 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]]
// CHECK2-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1
@ -1098,6 +1163,7 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1
// CHECK2-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK2-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
@ -1105,7 +1171,9 @@ void foo_simd(int low, int up) {
// CHECK2: omp.dispatch.inc:
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK2: omp.dispatch.end:
// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK2-NEXT: ret void
//
//
@ -1132,7 +1200,7 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I28:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4
// CHECK2-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4
@ -1219,9 +1287,11 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]])
// CHECK2-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1)
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK2: omp.dispatch.cond:
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -1229,13 +1299,15 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]]
// CHECK2: omp.inner.for.cond29:
// CHECK2-IRBUILDER: omp.inner.for.cond30:
// CHECK2-NORMAL: omp.inner.for.cond29:
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
// CHECK2-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1
// CHECK2-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
// CHECK2-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
// CHECK2: omp.inner.for.body32:
// CHECK2-IRBUILDER: omp.inner.for.body33:
// CHECK2-NORMAL: omp.inner.for.body32:
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK2-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1
@ -1247,15 +1319,19 @@ void foo_simd(int low, int up) {
// CHECK2-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7
// CHECK2-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]]
// CHECK2: omp.body.continue37:
// CHECK2-IRBUILDER: omp.body.continue38:
// CHECK2-NORMAL: omp.body.continue37:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]]
// CHECK2: omp.inner.for.inc38:
// CHECK2-IRBUILDER: omp.inner.for.inc39:
// CHECK2-NORMAL: omp.inner.for.inc38:
// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK2-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1
// CHECK2-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK2-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK2: omp.inner.for.end40:
// CHECK2-IRBUILDER: omp.inner.for.end42:
// CHECK2-NORMAL: omp.inner.for.end40:
// CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK2: omp.dispatch.inc:
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]]
@ -1278,7 +1354,9 @@ void foo_simd(int low, int up) {
// CHECK2: .omp.final.done:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK2-NEXT: ret void
//
//
@ -1322,7 +1400,7 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -1331,9 +1409,12 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK3-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK3: omp.dispatch.cond:
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -1351,6 +1432,7 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]]
// CHECK3-NEXT: store i32 [[SUB]], i32* [[I]], align 4
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4
@ -1382,6 +1464,7 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK3-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
@ -1389,7 +1472,9 @@ void foo_simd(int low, int up) {
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK3-NEXT: ret void
//
//
@ -1407,7 +1492,7 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -1416,9 +1501,11 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8
// CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
// CHECK3-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 67, i64 0, i64 16908287, i64 1, i64 1)
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK3: omp.dispatch.cond:
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -1437,6 +1524,7 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127
// CHECK3-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]]
// CHECK3-NEXT: store i64 [[ADD1]], i64* [[I]], align 8
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8
@ -1464,6 +1552,7 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1
// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK3-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
@ -1471,7 +1560,9 @@ void foo_simd(int low, int up) {
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK3-NEXT: ret void
//
//
@ -1497,7 +1588,7 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I8:%.*]] = alloca i8, align 1
// CHECK3-NEXT: [[X9:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -1530,9 +1621,11 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
// CHECK3-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 70, i64 0, i64 [[TMP6]], i64 1, i64 1)
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK3: omp.dispatch.cond:
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -1563,6 +1656,7 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]]
// CHECK3-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32
// CHECK3-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1
@ -1594,6 +1688,7 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK3-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1
// CHECK3-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK3-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
@ -1603,7 +1698,9 @@ void foo_simd(int low, int up) {
// CHECK3: omp.dispatch.end:
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK3-NEXT: ret void
//
//
@ -1624,7 +1721,7 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i8, align 1
// CHECK3-NEXT: [[X2:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -1634,9 +1731,11 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 69, i32 0, i32 199, i32 1, i32 1)
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK3: omp.dispatch.cond:
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -1664,6 +1763,7 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]]
// CHECK3-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1
@ -1695,6 +1795,7 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1
// CHECK3-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK3-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
@ -1702,7 +1803,9 @@ void foo_simd(int low, int up) {
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK3-NEXT: ret void
//
//
@ -1729,7 +1832,7 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I28:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4
// CHECK3-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4
@ -1816,9 +1919,11 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]])
// CHECK3-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1)
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK3: omp.dispatch.cond:
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -1826,13 +1931,15 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK3-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]]
// CHECK3: omp.inner.for.cond29:
// CHECK3-IRBUILDER: omp.inner.for.cond30:
// CHECK3-NORMAL: omp.inner.for.cond29:
// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
// CHECK3-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1
// CHECK3-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
// CHECK3-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
// CHECK3: omp.inner.for.body32:
// CHECK3-IRBUILDER: omp.inner.for.body33:
// CHECK3-NORMAL: omp.inner.for.body32:
// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7
// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK3-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1
@ -1844,15 +1951,19 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7
// CHECK3-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]]
// CHECK3: omp.body.continue37:
// CHECK3-IRBUILDER: omp.body.continue38:
// CHECK3-NORMAL: omp.body.continue37:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]]
// CHECK3: omp.inner.for.inc38:
// CHECK3-IRBUILDER: omp.inner.for.inc39:
// CHECK3-NORMAL: omp.inner.for.inc38:
// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK3-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1
// CHECK3-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK3-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK3: omp.inner.for.end40:
// CHECK3-IRBUILDER: omp.inner.for.end42:
// CHECK3-NORMAL: omp.inner.for.end40:
// CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
@ -1875,7 +1986,9 @@ void foo_simd(int low, int up) {
// CHECK3: .omp.final.done:
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK3-NEXT: ret void
//
//
@ -1919,7 +2032,7 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -1928,9 +2041,12 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK4-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK4-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK4: omp.dispatch.cond:
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -1948,6 +2064,7 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7
// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]]
// CHECK4-NEXT: store i32 [[SUB]], i32* [[I]], align 4
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4
@ -1979,6 +2096,7 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK4-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK4: omp.inner.for.end:
@ -1986,7 +2104,9 @@ void foo_simd(int low, int up) {
// CHECK4: omp.dispatch.inc:
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK4: omp.dispatch.end:
// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK4-NEXT: ret void
//
//
@ -2004,7 +2124,7 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -2013,9 +2133,11 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8
// CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
// CHECK4-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 67, i64 0, i64 16908287, i64 1, i64 1)
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK4: omp.dispatch.cond:
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -2034,6 +2156,7 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127
// CHECK4-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]]
// CHECK4-NEXT: store i64 [[ADD1]], i64* [[I]], align 8
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8
@ -2061,6 +2184,7 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK4-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1
// CHECK4-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK4-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK4: omp.inner.for.end:
@ -2068,7 +2192,9 @@ void foo_simd(int low, int up) {
// CHECK4: omp.dispatch.inc:
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK4: omp.dispatch.end:
// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK4-NEXT: ret void
//
//
@ -2094,7 +2220,7 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I8:%.*]] = alloca i8, align 1
// CHECK4-NEXT: [[X9:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -2127,9 +2253,11 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
// CHECK4-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 70, i64 0, i64 [[TMP6]], i64 1, i64 1)
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK4: omp.dispatch.cond:
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -2160,6 +2288,7 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]]
// CHECK4-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32
// CHECK4-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK4-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1
@ -2191,6 +2320,7 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK4-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1
// CHECK4-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK4-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK4: omp.inner.for.end:
@ -2200,7 +2330,9 @@ void foo_simd(int low, int up) {
// CHECK4: omp.dispatch.end:
// CHECK4-NEXT: br label [[OMP_PRECOND_END]]
// CHECK4: omp.precond.end:
// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK4-NEXT: ret void
//
//
@ -2221,7 +2353,7 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i8, align 1
// CHECK4-NEXT: [[X2:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@ -2231,9 +2363,11 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
// CHECK4-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 69, i32 0, i32 199, i32 1, i32 1)
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK4: omp.dispatch.cond:
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -2261,6 +2395,7 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]]
// CHECK4-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK4-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1
@ -2292,6 +2427,7 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1
// CHECK4-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK4-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK4: omp.inner.for.end:
@ -2299,7 +2435,9 @@ void foo_simd(int low, int up) {
// CHECK4: omp.dispatch.inc:
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK4: omp.dispatch.end:
// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK4-NEXT: ret void
//
//
@ -2326,7 +2464,7 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I28:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4
// CHECK4-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4
// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4
@ -2413,9 +2551,11 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]])
// CHECK4-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1)
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK4: omp.dispatch.cond:
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK4-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@ -2423,13 +2563,15 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK4-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]]
// CHECK4: omp.inner.for.cond29:
// CHECK4-IRBUILDER: omp.inner.for.cond30:
// CHECK4-NORMAL: omp.inner.for.cond29:
// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
// CHECK4-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1
// CHECK4-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
// CHECK4-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
// CHECK4: omp.inner.for.body32:
// CHECK4-IRBUILDER: omp.inner.for.body33:
// CHECK4-NORMAL: omp.inner.for.body32:
// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7
// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK4-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1
@ -2441,15 +2583,19 @@ void foo_simd(int low, int up) {
// CHECK4-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7
// CHECK4-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]]
// CHECK4: omp.body.continue37:
// CHECK4-IRBUILDER: omp.body.continue38:
// CHECK4-NORMAL: omp.body.continue37:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]]
// CHECK4: omp.inner.for.inc38:
// CHECK4-IRBUILDER: omp.inner.for.inc39:
// CHECK4-NORMAL: omp.inner.for.inc38:
// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK4-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1
// CHECK4-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK4-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK4: omp.inner.for.end40:
// CHECK4-IRBUILDER: omp.inner.for.end42:
// CHECK4-NORMAL: omp.inner.for.end40:
// CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK4: omp.dispatch.inc:
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND]]
@ -2472,7 +2618,9 @@ void foo_simd(int low, int up) {
// CHECK4: .omp.final.done:
// CHECK4-NEXT: br label [[OMP_PRECOND_END]]
// CHECK4: omp.precond.end:
// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK4-NEXT: ret void
//
//

View File

@ -1,6 +1,10 @@
// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NORMAL
// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NORMAL
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-IRBUILDER
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -triple x86_64-unknown-unknown -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-IRBUILDER
// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -emit-pch -o %t %s
@ -20,7 +24,7 @@ void foo();
int main() {
int i;
// CHECK: [[DIMS:%.+]] = alloca [1 x [[KMP_DIM]]],
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK-NORMAL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK: icmp
// CHECK-NEXT: br i1 %
// CHECK: [[CAST:%.+]] = bitcast [1 x [[KMP_DIM]]]* [[DIMS]] to i8*
@ -32,13 +36,13 @@ int main() {
// CHECK: store i64 1, i64* %
// CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], [1 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0
// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIM]] to i8*
// CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]])
// CHECK: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
// CHECK-NORMAL: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]])
// CHECK-NORMAL: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
#pragma omp for ordered(1)
for (i = 0; i < n; ++i) {
a[i] = b[i] + 1;
foo();
// CHECK: call void [[FOO:.+]](
// CHECK: call void (...) [[FOO:.+]](
// CHECK: load i32, i32* [[I:%.+]],
// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
// CHECK-NEXT: sdiv i32 %{{.+}}, 1
@ -46,11 +50,13 @@ int main() {
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-IRBUILDER-NEXT: [[GTID1:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID1]], i64* [[TMP]])
#pragma omp ordered depend(source)
c[i] = c[i] + 1;
foo();
// CHECK: call void [[FOO]]
// CHECK: call void (...) [[FOO]]
// CHECK: load i32, i32* [[I]],
// CHECK-NEXT: sub nsw i32 %{{.+}}, 2
// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
@ -59,12 +65,14 @@ int main() {
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-IRBUILDER-NEXT: [[GTID2:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID2]], i64* [[TMP]])
#pragma omp ordered depend(sink : i - 2)
d[i] = a[i - 2];
}
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK: ret i32 0
return 0;
}

View File

@ -1,6 +1,10 @@
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NORMAL
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NORMAL
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK,CHECK-IRBUILDER
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-IRBUILDER
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
@ -37,7 +41,7 @@ void bar() {
int main() {
int i;
// CHECK: [[DIMS:%.+]] = alloca [1 x [[KMP_DIM]]],
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK-NORMAL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK: icmp
// CHECK-NEXT: br i1 %
// CHECK: [[CAST:%.+]] = bitcast [1 x [[KMP_DIM]]]* [[DIMS]] to i8*
@ -49,8 +53,8 @@ int main() {
// CHECK: store i64 1, i64* %
// CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], [1 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0
// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIM]] to i8*
// CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]])
// CHECK: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
// CHECK-NORMAL: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]])
// CHECK-NORMAL: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
#pragma omp for ordered(1)
for (int i = 0; i < n; ++i) {
a[i] = b[i] + 1;
@ -63,7 +67,9 @@ int main() {
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-IRBUILDER-NEXT: [[GTID18:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID18]], i64* [[TMP]])
#pragma omp ordered depend(source)
c[i] = c[i] + 1;
foo();
@ -76,16 +82,18 @@ int main() {
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-IRBUILDER-NEXT: [[GTID30:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID30]], i64* [[TMP]])
#pragma omp ordered depend(sink : i - 2)
d[i] = a[i - 2];
}
// CHECK: landingpad
// CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK: br label %
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK: ret i32 0
return 0;
}
@ -93,7 +101,7 @@ int main() {
// CHECK-LABEL: main1
int main1() {
// CHECK: [[DIMS:%.+]] = alloca [1 x [[KMP_DIM]]],
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK-NORMAL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK: icmp
// CHECK-NEXT: br i1 %
// CHECK: [[CAST:%.+]] = bitcast [1 x [[KMP_DIM]]]* [[DIMS]] to i8*
@ -105,8 +113,8 @@ int main1() {
// CHECK: store i64 1, i64* %
// CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], [1 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0
// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIM]] to i8*
// CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]])
// CHECK: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
// CHECK-NORMAL: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 1, i8* [[CAST]])
// CHECK-NORMAL: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
#pragma omp for ordered(1)
for (int i = n; i > 0; --i) {
a[i] = b[i] + 1;
@ -120,7 +128,9 @@ int main1() {
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-IRBUILDER-NEXT: [[GTID17:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID17]], i64* [[TMP]])
#pragma omp ordered depend(source)
c[i] = c[i] + 1;
foo();
@ -134,16 +144,18 @@ int main1() {
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-IRBUILDER-NEXT: [[GTID29:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID29]], i64* [[TMP]])
#pragma omp ordered depend(sink : i - 2)
d[i] = a[i - 2];
}
// CHECK: landingpad
// CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK: br label %
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK: ret i32 0
return 0;
}
@ -161,7 +173,7 @@ struct TestStruct {
void baz(T, T);
TestStruct() {
// CHECK: [[DIMS:%.+]] = alloca [2 x [[KMP_DIM]]],
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK-NORMAL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK: [[CAST:%.+]] = bitcast [2 x [[KMP_DIM]]]* [[DIMS]] to i8*
// CHECK: call void @llvm.memset.p0i8.i64(i8* align 8 [[CAST]], i8 0, i64 48, i1 false)
// CHECK: [[DIM:%.+]] = getelementptr inbounds [2 x [[KMP_DIM]]], [2 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0
@ -176,8 +188,8 @@ struct TestStruct {
// CHECK: store i64 1, i64* %
// CHECK: [[DIM:%.+]] = getelementptr inbounds [2 x [[KMP_DIM]]], [2 x [[KMP_DIM]]]* [[DIMS]], i64 0, i64 0
// CHECK: [[CAST:%.+]] = bitcast [[KMP_DIM]]* [[DIM]] to i8*
// CHECK: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 2, i8* [[CAST]])
// CHECK: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
// CHECK-NORMAL: call void @__kmpc_doacross_init([[IDENT]], i32 [[GTID]], i32 2, i8* [[CAST]])
// CHECK-NORMAL: call void @__kmpc_for_static_init_4(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 33, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
#pragma omp for ordered(2)
for (T j = 0; j < M; j++)
for (i = 0; i < n; i += 2) {
@ -190,34 +202,42 @@ struct TestStruct {
// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
// CHECK-NEXT: sdiv i32 %{{.+}}, 1
// CHECK-NEXT: sext i32 %{{.+}} to i64
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
// CHECK-NORMAL-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NORMAL-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
// CHECK-NEXT: [[I:%.+]] = load i32*, i32** [[I_REF:%.+]],
// CHECK-NEXT: load i32, i32* [[I]],
// CHECK-NEXT: sub nsw i32 %{{.+}}, 2
// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
// CHECK-NEXT: sdiv i32 %{{.+}}, 2
// CHECK-NEXT: sext i32 %{{.+}} to i64
// CHECK-IRBUILDER-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-IRBUILDER-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 1
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-IRBUILDER-NEXT: [[GTID18:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID18]], i64* [[TMP]])
// CHECK-NEXT: load i32, i32* [[J:%.+]],
// CHECK-NEXT: sub nsw i32 %{{.+}}, 1
// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
// CHECK-NEXT: sdiv i32 %{{.+}}, 1
// CHECK-NEXT: sext i32 %{{.+}} to i64
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
// CHECK-NORMAL-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NORMAL-NEXT: store i64 %{{.+}}, i64* [[TMP]],
// CHECK-NEXT: [[I:%.+]] = load i32*, i32** [[I_REF]],
// CHECK-NEXT: load i32, i32* [[I]],
// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
// CHECK-NEXT: sdiv i32 %{{.+}}, 2
// CHECK-NEXT: sext i32 %{{.+}} to i64
// CHECK-IRBUILDER-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-IRBUILDER-NEXT: store i64 %{{.+}}, i64* [[TMP]],
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 1
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-IRBUILDER-NEXT: [[GTID27:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait([[IDENT]], i32 [[GTID27]], i64* [[TMP]])
#pragma omp ordered depend(sink : j, i - 2) depend(sink : j - 1, i)
b[i][j] = bar(a[i][j], b[i - 1][j], b[i][j - 1]);
// CHECK: invoke {{.+TestStruct.+bar}}
@ -228,27 +248,31 @@ struct TestStruct {
// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
// CHECK-NEXT: sdiv i32 %{{.+}}, 1
// CHECK-NEXT: sext i32 %{{.+}} to i64
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP:%.+]],
// CHECK-NORMAL-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-NORMAL-NEXT: store i64 %{{.+}}, i64* [[TMP]],
// CHECK-NEXT: [[I:%.+]] = load i32*, i32** [[I_REF]],
// CHECK-NEXT: load i32, i32* [[I]],
// CHECK-NEXT: sub nsw i32 %{{.+}}, 0
// CHECK-NEXT: sdiv i32 %{{.+}}, 2
// CHECK-NEXT: sext i32 %{{.+}} to i64
// CHECK-IRBUILDER-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT:%.+]], i64 0, i64 0
// CHECK-IRBUILDER-NEXT: store i64 %{{.+}}, i64* [[TMP]],
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 1
// CHECK-NEXT: store i64 %{{.+}}, i64* [[TMP]],
// CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[CNT]], i64 0, i64 0
// CHECK-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-NORMAL-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID]], i64* [[TMP]])
// CHECK-IRBUILDER-NEXT: [[GTID58:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT:%.+]])
// CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_post([[IDENT]], i32 [[GTID58]], i64* [[TMP]])
#pragma omp ordered depend(source)
baz(a[i][j], b[i][j]);
}
}
// CHECK: landingpad
// CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK: br label %
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK-NORMAL: call void @__kmpc_doacross_fini([[IDENT]], i32 [[GTID]])
// CHECK: ret
};

View File

@ -838,6 +838,35 @@ public:
FinalizeCallbackTy FiniCB,
StringRef CriticalName, Value *HintInst);
/// Generator for '#omp ordered depend (source | sink)'
///
/// \param Loc The insert and source location description.
/// \param AllocaIP The insertion point to be used for alloca instructions.
/// \param NumLoops The number of loops in depend clause.
/// \param StoreValues The value will be stored in vector address.
/// \param Name The name of alloca instruction.
/// \param IsDependSource If true, depend source; otherwise, depend sink.
///
/// \return The insertion position *after* the ordered.
InsertPointTy createOrderedDepend(const LocationDescription &Loc,
InsertPointTy AllocaIP, unsigned NumLoops,
ArrayRef<llvm::Value *> StoreValues,
const Twine &Name, bool IsDependSource);
/// Generator for '#omp ordered [threads | simd]'
///
/// \param Loc The insert and source location description.
/// \param BodyGenCB Callback that will generate the region code.
/// \param FiniCB Callback to finalize variable copies.
/// \param IsThreads If true, with threads clause or without clause;
/// otherwise, with simd clause;
///
/// \returns The insertion position *after* the ordered.
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
BodyGenCallbackTy BodyGenCB,
FinalizeCallbackTy FiniCB,
bool IsThreads);
/// Generator for '#omp sections'
///
/// \param Loc The insert and source location description.

View File

@ -2145,6 +2145,74 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical(
/*Conditional*/ false, /*hasFinalize*/ true);
}
OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc,
InsertPointTy AllocaIP, unsigned NumLoops,
ArrayRef<llvm::Value *> StoreValues,
const Twine &Name, bool IsDependSource) {
if (!updateToLocation(Loc))
return Loc.IP;
// Allocate space for vector and generate alloc instruction.
auto *ArrI64Ty = ArrayType::get(Int64, NumLoops);
Builder.restoreIP(AllocaIP);
AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty, nullptr, Name);
ArgsBase->setAlignment(Align(8));
Builder.restoreIP(Loc.IP);
// Store the index value with offset in depend vector.
for (unsigned I = 0; I < NumLoops; ++I) {
Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(I)});
Builder.CreateStore(StoreValues[I], DependAddrGEPIter);
}
Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadId = getOrCreateThreadID(Ident);
Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
Function *RTLFn = nullptr;
if (IsDependSource)
RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
else
RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
Builder.CreateCall(RTLFn, Args);
return Builder.saveIP();
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd(
const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
FinalizeCallbackTy FiniCB, bool IsThreads) {
if (!updateToLocation(Loc))
return Loc.IP;
Directive OMPD = Directive::OMPD_ordered;
Instruction *EntryCall = nullptr;
Instruction *ExitCall = nullptr;
if (IsThreads) {
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
Value *Ident = getOrCreateIdent(SrcLocStr);
Value *ThreadId = getOrCreateThreadID(Ident);
Value *Args[] = {Ident, ThreadId};
Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
EntryCall = Builder.CreateCall(EntryRTLFn, Args);
Function *ExitRTLFn =
getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
ExitCall = Builder.CreateCall(ExitRTLFn, Args);
}
return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
/*Conditional*/ false, /*hasFinalize*/ true);
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,

View File

@ -2120,6 +2120,320 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
EXPECT_EQ(CriticalEndCI->getArgOperand(2)->getType(), CriticalNamePtrTy);
}
TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSource) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
LLVMContext &Ctx = M->getContext();
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
InsertPointTy AllocaIP(&F->getEntryBlock(),
F->getEntryBlock().getFirstInsertionPt());
unsigned NumLoops = 2;
SmallVector<Value *, 2> StoreValues;
Type *LCTy = Type::getInt64Ty(Ctx);
StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
// Test for "#omp ordered depend(source)"
Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
StoreValues, ".cnt.addr",
/*IsDependSource=*/true));
Builder.CreateRetVoid();
OMPBuilder.finalize();
EXPECT_FALSE(verifyModule(*M, &errs()));
AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
ASSERT_NE(AllocInst, nullptr);
ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
EXPECT_EQ(ArrType->getNumElements(), NumLoops);
EXPECT_TRUE(
AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
GetElementPtrInst *DependAddrGEPIter =
dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
ASSERT_NE(DependAddrGEPIter, nullptr);
EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
ASSERT_NE(FirstIdx, nullptr);
ASSERT_NE(SecondIdx, nullptr);
EXPECT_EQ(FirstIdx->getValue(), 0);
EXPECT_EQ(SecondIdx->getValue(), Iter);
StoreInst *StoreValue =
dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
ASSERT_NE(StoreValue, nullptr);
EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
IterInst = dyn_cast<Instruction>(StoreValue);
}
GetElementPtrInst *DependBaseAddrGEP =
dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
ASSERT_NE(DependBaseAddrGEP, nullptr);
EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
ASSERT_NE(FirstIdx, nullptr);
ASSERT_NE(SecondIdx, nullptr);
EXPECT_EQ(FirstIdx->getValue(), 0);
EXPECT_EQ(SecondIdx->getValue(), 0);
CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
ASSERT_NE(GTID, nullptr);
EXPECT_EQ(GTID->getNumArgOperands(), 1U);
EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
ASSERT_NE(Depend, nullptr);
EXPECT_EQ(Depend->getNumArgOperands(), 3U);
EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_post");
EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
EXPECT_EQ(Depend->getArgOperand(1), GTID);
EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
}
TEST_F(OpenMPIRBuilderTest, OrderedDirectiveDependSink) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
LLVMContext &Ctx = M->getContext();
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
InsertPointTy AllocaIP(&F->getEntryBlock(),
F->getEntryBlock().getFirstInsertionPt());
unsigned NumLoops = 2;
SmallVector<Value *, 2> StoreValues;
Type *LCTy = Type::getInt64Ty(Ctx);
StoreValues.emplace_back(ConstantInt::get(LCTy, 1));
StoreValues.emplace_back(ConstantInt::get(LCTy, 2));
// Test for "#omp ordered depend(sink: vec)"
Builder.restoreIP(OMPBuilder.createOrderedDepend(Builder, AllocaIP, NumLoops,
StoreValues, ".cnt.addr",
/*IsDependSource=*/false));
Builder.CreateRetVoid();
OMPBuilder.finalize();
EXPECT_FALSE(verifyModule(*M, &errs()));
AllocaInst *AllocInst = dyn_cast<AllocaInst>(&BB->front());
ASSERT_NE(AllocInst, nullptr);
ArrayType *ArrType = dyn_cast<ArrayType>(AllocInst->getAllocatedType());
EXPECT_EQ(ArrType->getNumElements(), NumLoops);
EXPECT_TRUE(
AllocInst->getAllocatedType()->getArrayElementType()->isIntegerTy(64));
Instruction *IterInst = dyn_cast<Instruction>(AllocInst);
for (unsigned Iter = 0; Iter < NumLoops; Iter++) {
GetElementPtrInst *DependAddrGEPIter =
dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
ASSERT_NE(DependAddrGEPIter, nullptr);
EXPECT_EQ(DependAddrGEPIter->getPointerOperand(), AllocInst);
EXPECT_EQ(DependAddrGEPIter->getNumIndices(), (unsigned)2);
auto *FirstIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(1));
auto *SecondIdx = dyn_cast<ConstantInt>(DependAddrGEPIter->getOperand(2));
ASSERT_NE(FirstIdx, nullptr);
ASSERT_NE(SecondIdx, nullptr);
EXPECT_EQ(FirstIdx->getValue(), 0);
EXPECT_EQ(SecondIdx->getValue(), Iter);
StoreInst *StoreValue =
dyn_cast<StoreInst>(DependAddrGEPIter->getNextNode());
ASSERT_NE(StoreValue, nullptr);
EXPECT_EQ(StoreValue->getValueOperand(), StoreValues[Iter]);
EXPECT_EQ(StoreValue->getPointerOperand(), DependAddrGEPIter);
IterInst = dyn_cast<Instruction>(StoreValue);
}
GetElementPtrInst *DependBaseAddrGEP =
dyn_cast<GetElementPtrInst>(IterInst->getNextNode());
ASSERT_NE(DependBaseAddrGEP, nullptr);
EXPECT_EQ(DependBaseAddrGEP->getPointerOperand(), AllocInst);
EXPECT_EQ(DependBaseAddrGEP->getNumIndices(), (unsigned)2);
auto *FirstIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(1));
auto *SecondIdx = dyn_cast<ConstantInt>(DependBaseAddrGEP->getOperand(2));
ASSERT_NE(FirstIdx, nullptr);
ASSERT_NE(SecondIdx, nullptr);
EXPECT_EQ(FirstIdx->getValue(), 0);
EXPECT_EQ(SecondIdx->getValue(), 0);
CallInst *GTID = dyn_cast<CallInst>(DependBaseAddrGEP->getNextNode());
ASSERT_NE(GTID, nullptr);
EXPECT_EQ(GTID->getNumArgOperands(), 1U);
EXPECT_EQ(GTID->getCalledFunction()->getName(), "__kmpc_global_thread_num");
EXPECT_FALSE(GTID->getCalledFunction()->doesNotAccessMemory());
EXPECT_FALSE(GTID->getCalledFunction()->doesNotFreeMemory());
CallInst *Depend = dyn_cast<CallInst>(GTID->getNextNode());
ASSERT_NE(Depend, nullptr);
EXPECT_EQ(Depend->getNumArgOperands(), 3U);
EXPECT_EQ(Depend->getCalledFunction()->getName(), "__kmpc_doacross_wait");
EXPECT_TRUE(isa<GlobalVariable>(Depend->getArgOperand(0)));
EXPECT_EQ(Depend->getArgOperand(1), GTID);
EXPECT_EQ(Depend->getArgOperand(2), DependBaseAddrGEP);
}
TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
AllocaInst *PrivAI =
Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
BasicBlock *EntryBB = nullptr;
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
BasicBlock &FiniBB) {
EntryBB = FiniBB.getUniquePredecessor();
llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
EXPECT_EQ(EntryBB, CodeGenIPBB);
Builder.restoreIP(CodeGenIP);
Builder.CreateStore(F->arg_begin(), PrivAI);
Value *PrivLoad =
Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
};
auto FiniCB = [&](InsertPointTy IP) {
BasicBlock *IPBB = IP.getBlock();
EXPECT_NE(IPBB->end(), IP.getPoint());
};
// Test for "#omp ordered [threads]"
Builder.restoreIP(
OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, true));
Builder.CreateRetVoid();
OMPBuilder.finalize();
EXPECT_FALSE(verifyModule(*M, &errs()));
EXPECT_NE(EntryBB->getTerminator(), nullptr);
CallInst *OrderedEntryCI = nullptr;
for (auto &EI : *EntryBB) {
Instruction *Cur = &EI;
if (isa<CallInst>(Cur)) {
OrderedEntryCI = cast<CallInst>(Cur);
if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
break;
OrderedEntryCI = nullptr;
}
}
EXPECT_NE(OrderedEntryCI, nullptr);
EXPECT_EQ(OrderedEntryCI->getNumArgOperands(), 2U);
EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered");
EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0)));
CallInst *OrderedEndCI = nullptr;
for (auto &FI : *EntryBB) {
Instruction *Cur = &FI;
if (isa<CallInst>(Cur)) {
OrderedEndCI = cast<CallInst>(Cur);
if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
break;
OrderedEndCI = nullptr;
}
}
EXPECT_NE(OrderedEndCI, nullptr);
EXPECT_EQ(OrderedEndCI->getNumArgOperands(), 2U);
EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0)));
EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1));
}
TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
AllocaInst *PrivAI =
Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
BasicBlock *EntryBB = nullptr;
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
BasicBlock &FiniBB) {
EntryBB = FiniBB.getUniquePredecessor();
llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
EXPECT_EQ(EntryBB, CodeGenIPBB);
Builder.restoreIP(CodeGenIP);
Builder.CreateStore(F->arg_begin(), PrivAI);
Value *PrivLoad =
Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
};
auto FiniCB = [&](InsertPointTy IP) {
BasicBlock *IPBB = IP.getBlock();
EXPECT_NE(IPBB->end(), IP.getPoint());
};
// Test for "#omp ordered simd"
Builder.restoreIP(
OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, false));
Builder.CreateRetVoid();
OMPBuilder.finalize();
EXPECT_FALSE(verifyModule(*M, &errs()));
EXPECT_NE(EntryBB->getTerminator(), nullptr);
CallInst *OrderedEntryCI = nullptr;
for (auto &EI : *EntryBB) {
Instruction *Cur = &EI;
if (isa<CallInst>(Cur)) {
OrderedEntryCI = cast<CallInst>(Cur);
if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
break;
OrderedEntryCI = nullptr;
}
}
EXPECT_EQ(OrderedEntryCI, nullptr);
CallInst *OrderedEndCI = nullptr;
for (auto &FI : *EntryBB) {
Instruction *Cur = &FI;
if (isa<CallInst>(Cur)) {
OrderedEndCI = cast<CallInst>(Cur);
if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
break;
OrderedEndCI = nullptr;
}
}
EXPECT_EQ(OrderedEndCI, nullptr);
}
TEST_F(OpenMPIRBuilderTest, CopyinBlocks) {
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();