[OPENMP] Fix for PR33922: New ident_t flags for

__kmpc_for_static_init().

OpenMP 5.0 will include OpenMP Tools interface that requires distinguishing different worksharing constructs.

Since the same entry point (__kmp_for_static_init(ident_t *loc,
kmp_int32 global_tid,........)) is called in case static
loop/sections/distribute it is suggested using 'flags' field of the
ident_t structure to pass the type of the construct.

llvm-svn: 310865
This commit is contained in:
Alexey Bataev 2017-08-14 17:56:13 +00:00
parent a13897245c
commit 0f87dbee4e
7 changed files with 149 additions and 123 deletions

View File

@ -19,6 +19,7 @@
#include "clang/AST/Decl.h"
#include "clang/AST/StmtOpenMP.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DerivedTypes.h"
@ -420,7 +421,7 @@ public:
/// \brief Values for bit flags used in the ident_t to describe the fields.
/// All enumeric elements are named and described in accordance with the code
/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
enum OpenMPLocationFlags {
enum OpenMPLocationFlags : unsigned {
/// \brief Use trampoline for internal microtask.
OMP_IDENT_IMD = 0x01,
/// \brief Use c-style ident structure.
@ -436,7 +437,14 @@ enum OpenMPLocationFlags {
/// \brief Implicit barrier in 'sections' directive.
OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
/// \brief Implicit barrier in 'single' directive.
OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
/// Call of __kmp_for_static_init for static loop.
OMP_IDENT_WORK_LOOP = 0x200,
/// Call of __kmp_for_static_init for sections.
OMP_IDENT_WORK_SECTIONS = 0x400,
/// Call of __kmp_for_static_init for distribute.
OMP_IDENT_WORK_DISTRIBUTE = 0x800,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
};
/// \brief Describes ident structure that describes a source location.
@ -2956,79 +2964,85 @@ static void emitForStaticInitCall(
CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
Address ST, llvm::Value *Chunk) {
const CGOpenMPRuntime::StaticRTInput &Values) {
if (!CGF.HaveInsertPoint())
return;
return;
assert(!Ordered);
assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
Schedule == OMP_sch_static_balanced_chunked ||
Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
Schedule == OMP_dist_sch_static ||
Schedule == OMP_dist_sch_static_chunked);
assert(!Values.Ordered);
assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
Schedule == OMP_sch_static_balanced_chunked ||
Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
Schedule == OMP_dist_sch_static ||
Schedule == OMP_dist_sch_static_chunked);
// Call __kmpc_for_static_init(
// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
// kmp_int[32|64] incr, kmp_int[32|64] chunk);
if (Chunk == nullptr) {
assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule");
// If the Chunk was not specified in the clause - use default value 1.
Chunk = CGF.Builder.getIntN(IVSize, 1);
} else {
assert((Schedule == OMP_sch_static_chunked ||
Schedule == OMP_sch_static_balanced_chunked ||
Schedule == OMP_ord_static_chunked ||
Schedule == OMP_dist_sch_static_chunked) &&
"expected static chunked schedule");
}
llvm::Value *Args[] = {
UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
Schedule, M1, M2)), // Schedule type
IL.getPointer(), // &isLastIter
LB.getPointer(), // &LB
UB.getPointer(), // &UB
ST.getPointer(), // &Stride
CGF.Builder.getIntN(IVSize, 1), // Incr
Chunk // Chunk
};
CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
// Call __kmpc_for_static_init(
// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
// kmp_int[32|64] incr, kmp_int[32|64] chunk);
llvm::Value *Chunk = Values.Chunk;
if (Chunk == nullptr) {
assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule");
// If the Chunk was not specified in the clause - use default value 1.
Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
} else {
assert((Schedule == OMP_sch_static_chunked ||
Schedule == OMP_sch_static_balanced_chunked ||
Schedule == OMP_ord_static_chunked ||
Schedule == OMP_dist_sch_static_chunked) &&
"expected static chunked schedule");
}
llvm::Value *Args[] = {
UpdateLocation,
ThreadId,
CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
M2)), // Schedule type
Values.IL.getPointer(), // &isLastIter
Values.LB.getPointer(), // &LB
Values.UB.getPointer(), // &UB
Values.ST.getPointer(), // &Stride
CGF.Builder.getIntN(Values.IVSize, 1), // Incr
Chunk // Chunk
};
CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
}
void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
SourceLocation Loc,
OpenMPDirectiveKind DKind,
const OpenMPScheduleTy &ScheduleKind,
unsigned IVSize, bool IVSigned,
bool Ordered, Address IL, Address LB,
Address UB, Address ST,
llvm::Value *Chunk) {
OpenMPSchedType ScheduleNum =
getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
const StaticRTInput &Values) {
OpenMPSchedType ScheduleNum = getRuntimeSchedule(
ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
assert(isOpenMPWorksharingDirective(DKind) &&
"Expected loop-based or sections-based directive.");
auto *UpdatedLocation = emitUpdateLocation(CGF, Loc,
isOpenMPLoopDirective(DKind)
? OMP_IDENT_WORK_LOOP
: OMP_IDENT_WORK_SECTIONS);
auto *ThreadId = getThreadID(CGF, Loc);
auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
auto *StaticInitFunction =
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
Ordered, IL, LB, UB, ST, Chunk);
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
}
void CGOpenMPRuntime::emitDistributeStaticInit(
CodeGenFunction &CGF, SourceLocation Loc,
OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
bool Ordered, Address IL, Address LB, Address UB, Address ST,
llvm::Value *Chunk) {
OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
OpenMPDistScheduleClauseKind SchedKind,
const CGOpenMPRuntime::StaticRTInput &Values) {
OpenMPSchedType ScheduleNum =
getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
auto *UpdatedLocation =
emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
auto *ThreadId = getThreadID(CGF, Loc);
auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
auto *StaticInitFunction =
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
UB, ST, Chunk);
OMPC_SCHEDULE_MODIFIER_unknown, Values);
}
void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,

View File

@ -806,6 +806,35 @@ public:
unsigned IVSize, bool IVSigned, bool Ordered,
const DispatchRTInput &DispatchValues);
/// Struct with the values to be passed to the static runtime function
struct StaticRTInput {
/// Size of the iteration variable in bits.
unsigned IVSize = 0;
/// Sign of the iteration variable.
bool IVSigned = false;
/// true if loop is ordered, false otherwise.
bool Ordered = false;
/// Address of the output variable in which the flag of the last iteration
/// is returned.
Address IL = Address::invalid();
/// Address of the output variable in which the lower iteration number is
/// returned.
Address LB = Address::invalid();
/// Address of the output variable in which the upper iteration number is
/// returned.
Address UB = Address::invalid();
/// Address of the output variable in which the stride value is returned
/// necessary to generated the static_chunked scheduled loop.
Address ST = Address::invalid();
/// Value of the chunk for the static_chunked scheduled loop. For the
/// default (nullptr) value, the chunk 1 will be used.
llvm::Value *Chunk = nullptr;
StaticRTInput(unsigned IVSize, bool IVSigned, bool Ordered, Address IL,
Address LB, Address UB, Address ST,
llvm::Value *Chunk = nullptr)
: IVSize(IVSize), IVSigned(IVSigned), Ordered(Ordered), IL(IL), LB(LB),
UB(UB), ST(ST), Chunk(Chunk) {}
};
/// \brief Call the appropriate runtime routine to initialize it before start
/// of loop.
///
@ -813,55 +842,29 @@ public:
/// specify a ordered clause on the loop construct.
/// Depending on the loop schedule, it is necessary to call some runtime
/// routine before start of the OpenMP loop to get the loop upper / lower
/// bounds \a LB and \a UB and stride \a ST.
/// bounds LB and UB and stride ST.
///
/// \param CGF Reference to current CodeGenFunction.
/// \param Loc Clang source location.
/// \param DKind Kind of the directive.
/// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
/// \param IVSize Size of the iteration variable in bits.
/// \param IVSigned Sign of the iteration variable.
/// \param Ordered true if loop is ordered, false otherwise.
/// \param IL Address of the output variable in which the flag of the
/// last iteration is returned.
/// \param LB Address of the output variable in which the lower iteration
/// number is returned.
/// \param UB Address of the output variable in which the upper iteration
/// number is returned.
/// \param ST Address of the output variable in which the stride value is
/// returned necessary to generated the static_chunked scheduled loop.
/// \param Chunk Value of the chunk for the static_chunked scheduled loop.
/// For the default (nullptr) value, the chunk 1 will be used.
/// \param Values Input arguments for the construct.
///
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPDirectiveKind DKind,
const OpenMPScheduleTy &ScheduleKind,
unsigned IVSize, bool IVSigned, bool Ordered,
Address IL, Address LB, Address UB, Address ST,
llvm::Value *Chunk = nullptr);
const StaticRTInput &Values);
///
/// \param CGF Reference to current CodeGenFunction.
/// \param Loc Clang source location.
/// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause.
/// \param IVSize Size of the iteration variable in bits.
/// \param IVSigned Sign of the iteration variable.
/// \param Ordered true if loop is ordered, false otherwise.
/// \param IL Address of the output variable in which the flag of the
/// last iteration is returned.
/// \param LB Address of the output variable in which the lower iteration
/// number is returned.
/// \param UB Address of the output variable in which the upper iteration
/// number is returned.
/// \param ST Address of the output variable in which the stride value is
/// returned necessary to generated the static_chunked scheduled loop.
/// \param Chunk Value of the chunk for the static_chunked scheduled loop.
/// For the default (nullptr) value, the chunk 1 will be used.
/// \param Values Input arguments for the construct.
///
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc,
virtual void emitDistributeStaticInit(CodeGenFunction &CGF,
SourceLocation Loc,
OpenMPDistScheduleClauseKind SchedKind,
unsigned IVSize, bool IVSigned,
bool Ordered, Address IL, Address LB,
Address UB, Address ST,
llvm::Value *Chunk = nullptr);
const StaticRTInput &Values);
/// \brief Call the appropriate runtime routine to notify that we finished
/// iteration of the ordered loop with the dynamic scheduling.

View File

@ -1771,9 +1771,11 @@ void CodeGenFunction::EmitOMPForOuterLoop(
RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
IVSigned, Ordered, DipatchRTInputValues);
} else {
RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
LoopArgs.ST, LoopArgs.Chunk);
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
LoopArgs.ST, LoopArgs.Chunk);
RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
ScheduleKind, StaticInit);
}
auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
@ -1815,10 +1817,10 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize,
IVSigned, /* Ordered = */ false, LoopArgs.IL,
LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
LoopArgs.Chunk);
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
// for combined 'distribute' and 'for' the increment expression of distribute
// is store in DistInc. For 'distribute' alone, it is in Inc.
@ -2227,10 +2229,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
// chunks that are approximately equal in size, and at most one chunk is
// distributed to each thread. Note that the size of the chunks is
// unspecified in this case.
RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
IVSize, IVSigned, Ordered,
IL.getAddress(), LB.getAddress(),
UB.getAddress(), ST.getAddress());
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
UB.getAddress(), ST.getAddress());
RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
ScheduleKind, StaticInit);
auto LoopExit =
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
// UB = min(UB, GlobalUB);
@ -2462,10 +2465,11 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
// Emit static non-chunked loop.
OpenMPScheduleTy ScheduleKind;
ScheduleKind.Schedule = OMPC_SCHEDULE_static;
CGOpenMPRuntime::StaticRTInput StaticInit(
/*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
LB.getAddress(), UB.getAddress(), ST.getAddress());
CGF.CGM.getOpenMPRuntime().emitForStaticInit(
CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32,
/*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
UB.getAddress(), ST.getAddress());
CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
// UB = min(UB, GlobalUB);
auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
@ -3080,10 +3084,11 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
// league. The size of the chunks is unspecified in this case.
if (RT.isStaticNonchunked(ScheduleKind,
/* Chunked */ Chunk != nullptr)) {
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
LB.getAddress(), UB.getAddress(), ST.getAddress());
RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
IVSize, IVSigned, /* Ordered = */ false,
IL.getAddress(), LB.getAddress(),
UB.getAddress(), ST.getAddress());
StaticInit);
auto LoopExit =
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
// UB = min(UB, GlobalUB);

View File

@ -23,6 +23,7 @@
// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
// CHECK-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
// CHECK-DAG: [[DEF_LOC_DISTRIBUTE_0:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2050, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
// CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
void without_schedule_clause(float *a, float *b, float *c, float *d) {
@ -48,7 +49,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) {
// CHECK-DAG: store i32 0, i32* [[LAST]]
// CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]]
// CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]]
// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1)
// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1)
// CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]]
// CHECK-DAG: [[USWITCH:%.+]] = icmp sgt i32 [[UBV0]], 4571423
// CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]]
@ -110,7 +111,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
// CHECK-DAG: store i32 0, i32* [[LAST]]
// CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]]
// CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]]
// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1)
// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1)
// CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]]
// CHECK-DAG: [[USWITCH:%.+]] = icmp sgt i32 [[UBV0]], 4571423
// CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]]
@ -172,7 +173,7 @@ void static_chunked(float *a, float *b, float *c, float *d) {
// CHECK-DAG: store i32 0, i32* [[LAST]]
// CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]]
// CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]]
// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TIDV]], i32 91, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 5)
// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 91, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 5)
// CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]]
// CHECK-DAG: [[USWITCH:%.+]] = icmp ugt i32 [[UBV0]], 16908288
// CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]]

View File

@ -11,6 +11,7 @@
// CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 0, i8*
// CHECK-DAG: [[I:@.+]] = global i8 1,
// CHECK-DAG: [[J:@.+]] = global i8 2,
// CHECK-DAG: [[K:@.+]] = global i8 3,
@ -19,7 +20,7 @@
void without_schedule_clause(float *a, float *b, float *c, float *d) {
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
#pragma omp for nowait
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
// UB = min(UB, GlobalUB)
// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423
@ -60,7 +61,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) {
void static_not_chunked(float *a, float *b, float *c, float *d) {
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
#pragma omp for schedule(static)
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
// UB = min(UB, GlobalUB)
// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423
@ -101,7 +102,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
void static_chunked(float *a, float *b, float *c, float *d) {
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
#pragma omp for schedule(monotonic: static, 5)
// CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 536870945, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5)
// CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 536870945, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5)
// UB = min(UB, GlobalUB)
// CHECK: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
// CHECK-NEXT: [[UBCMP:%.+]] = icmp ugt i32 [[UB]], 16908288

View File

@ -8,6 +8,7 @@
#define HEADER
// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 514, i32 0, i32 0, i8*
// CHECK-LABEL: with_var_schedule
void with_var_schedule() {
@ -19,8 +20,8 @@ void with_var_schedule() {
// CHECK: [[CHUNK:%.+]] = load i8*, i8** %
// CHECK: [[CHUNK_VAL:%.+]] = load i8, i8* [[CHUNK]],
// CHECK: [[CHUNK_SIZE:%.+]] = sext i8 [[CHUNK_VAL]] to i64
// CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC:@[^,]+]], i32 [[GTID:%[^,]+]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]])
// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
// CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID:%[^,]+]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]])
// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC:@[^,]+]], i32 [[GTID]])
#pragma omp parallel for schedule(static, char(a))
for (unsigned long long i = 1; i < 2; ++i) {
}
@ -34,7 +35,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) {
// CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
// UB = min(UB, GlobalUB)
// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423
@ -77,7 +78,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
// CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
// UB = min(UB, GlobalUB)
// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423
@ -120,7 +121,7 @@ void static_chunked(float *a, float *b, float *c, float *d) {
// CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
// CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 33, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5)
// CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 33, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5)
// UB = min(UB, GlobalUB)
// CHECK: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
// CHECK-NEXT: [[UBCMP:%.+]] = icmp ugt i32 [[UB]], 16908288

View File

@ -4,7 +4,8 @@
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
// CHECK: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 0, i8*
// CHECK-LABEL: foo
void foo() {};
// CHECK-LABEL: bar
@ -29,7 +30,7 @@ int main() {
{
// CHECK: store i32 0, i32* [[LB_PTR:%.+]],
// CHECK: store i32 1, i32* [[UB_PTR:%.+]],
// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_PTR:%.+]], i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[STRIDE_PTR:%.+]], i32 1, i32 1)
// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* [[SECTIONS_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST_PTR:%.+]], i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[STRIDE_PTR:%.+]], i32 1, i32 1)
// <<UB = min(UB, GlobalUB);>>
// CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]]
// CHECK: [[CMP:%.+]] = icmp slt i32 [[UB]], 1