forked from OSchip/llvm-project
[OPENMP] Fix for PR33922: New ident_t flags for
__kmpc_for_static_init(). OpenMP 5.0 will include OpenMP Tools interface that requires distinguishing different worksharing constructs. Since the same entry point (__kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,........)) is called in case static loop/sections/distribute it is suggested using 'flags' field of the ident_t structure to pass the type of the construct. llvm-svn: 310865
This commit is contained in:
parent
a13897245c
commit
0f87dbee4e
|
@ -19,6 +19,7 @@
|
|||
#include "clang/AST/Decl.h"
|
||||
#include "clang/AST/StmtOpenMP.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/BitmaskEnum.h"
|
||||
#include "llvm/Bitcode/BitcodeReader.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
|
@ -420,7 +421,7 @@ public:
|
|||
/// \brief Values for bit flags used in the ident_t to describe the fields.
|
||||
/// All enumeric elements are named and described in accordance with the code
|
||||
/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
|
||||
enum OpenMPLocationFlags {
|
||||
enum OpenMPLocationFlags : unsigned {
|
||||
/// \brief Use trampoline for internal microtask.
|
||||
OMP_IDENT_IMD = 0x01,
|
||||
/// \brief Use c-style ident structure.
|
||||
|
@ -436,7 +437,14 @@ enum OpenMPLocationFlags {
|
|||
/// \brief Implicit barrier in 'sections' directive.
|
||||
OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
|
||||
/// \brief Implicit barrier in 'single' directive.
|
||||
OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
|
||||
OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
|
||||
/// Call of __kmp_for_static_init for static loop.
|
||||
OMP_IDENT_WORK_LOOP = 0x200,
|
||||
/// Call of __kmp_for_static_init for sections.
|
||||
OMP_IDENT_WORK_SECTIONS = 0x400,
|
||||
/// Call of __kmp_for_static_init for distribute.
|
||||
OMP_IDENT_WORK_DISTRIBUTE = 0x800,
|
||||
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
|
||||
};
|
||||
|
||||
/// \brief Describes ident structure that describes a source location.
|
||||
|
@ -2956,79 +2964,85 @@ static void emitForStaticInitCall(
|
|||
CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
|
||||
llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
|
||||
OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
|
||||
unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
|
||||
Address ST, llvm::Value *Chunk) {
|
||||
const CGOpenMPRuntime::StaticRTInput &Values) {
|
||||
if (!CGF.HaveInsertPoint())
|
||||
return;
|
||||
return;
|
||||
|
||||
assert(!Ordered);
|
||||
assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
|
||||
Schedule == OMP_sch_static_balanced_chunked ||
|
||||
Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
|
||||
Schedule == OMP_dist_sch_static ||
|
||||
Schedule == OMP_dist_sch_static_chunked);
|
||||
assert(!Values.Ordered);
|
||||
assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
|
||||
Schedule == OMP_sch_static_balanced_chunked ||
|
||||
Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
|
||||
Schedule == OMP_dist_sch_static ||
|
||||
Schedule == OMP_dist_sch_static_chunked);
|
||||
|
||||
// Call __kmpc_for_static_init(
|
||||
// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
|
||||
// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
|
||||
// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
|
||||
// kmp_int[32|64] incr, kmp_int[32|64] chunk);
|
||||
if (Chunk == nullptr) {
|
||||
assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
|
||||
Schedule == OMP_dist_sch_static) &&
|
||||
"expected static non-chunked schedule");
|
||||
// If the Chunk was not specified in the clause - use default value 1.
|
||||
Chunk = CGF.Builder.getIntN(IVSize, 1);
|
||||
} else {
|
||||
assert((Schedule == OMP_sch_static_chunked ||
|
||||
Schedule == OMP_sch_static_balanced_chunked ||
|
||||
Schedule == OMP_ord_static_chunked ||
|
||||
Schedule == OMP_dist_sch_static_chunked) &&
|
||||
"expected static chunked schedule");
|
||||
}
|
||||
llvm::Value *Args[] = {
|
||||
UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
|
||||
Schedule, M1, M2)), // Schedule type
|
||||
IL.getPointer(), // &isLastIter
|
||||
LB.getPointer(), // &LB
|
||||
UB.getPointer(), // &UB
|
||||
ST.getPointer(), // &Stride
|
||||
CGF.Builder.getIntN(IVSize, 1), // Incr
|
||||
Chunk // Chunk
|
||||
};
|
||||
CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
|
||||
// Call __kmpc_for_static_init(
|
||||
// ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
|
||||
// kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
|
||||
// kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
|
||||
// kmp_int[32|64] incr, kmp_int[32|64] chunk);
|
||||
llvm::Value *Chunk = Values.Chunk;
|
||||
if (Chunk == nullptr) {
|
||||
assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
|
||||
Schedule == OMP_dist_sch_static) &&
|
||||
"expected static non-chunked schedule");
|
||||
// If the Chunk was not specified in the clause - use default value 1.
|
||||
Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
|
||||
} else {
|
||||
assert((Schedule == OMP_sch_static_chunked ||
|
||||
Schedule == OMP_sch_static_balanced_chunked ||
|
||||
Schedule == OMP_ord_static_chunked ||
|
||||
Schedule == OMP_dist_sch_static_chunked) &&
|
||||
"expected static chunked schedule");
|
||||
}
|
||||
llvm::Value *Args[] = {
|
||||
UpdateLocation,
|
||||
ThreadId,
|
||||
CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
|
||||
M2)), // Schedule type
|
||||
Values.IL.getPointer(), // &isLastIter
|
||||
Values.LB.getPointer(), // &LB
|
||||
Values.UB.getPointer(), // &UB
|
||||
Values.ST.getPointer(), // &Stride
|
||||
CGF.Builder.getIntN(Values.IVSize, 1), // Incr
|
||||
Chunk // Chunk
|
||||
};
|
||||
CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
|
||||
SourceLocation Loc,
|
||||
OpenMPDirectiveKind DKind,
|
||||
const OpenMPScheduleTy &ScheduleKind,
|
||||
unsigned IVSize, bool IVSigned,
|
||||
bool Ordered, Address IL, Address LB,
|
||||
Address UB, Address ST,
|
||||
llvm::Value *Chunk) {
|
||||
OpenMPSchedType ScheduleNum =
|
||||
getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
|
||||
auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
|
||||
const StaticRTInput &Values) {
|
||||
OpenMPSchedType ScheduleNum = getRuntimeSchedule(
|
||||
ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
|
||||
assert(isOpenMPWorksharingDirective(DKind) &&
|
||||
"Expected loop-based or sections-based directive.");
|
||||
auto *UpdatedLocation = emitUpdateLocation(CGF, Loc,
|
||||
isOpenMPLoopDirective(DKind)
|
||||
? OMP_IDENT_WORK_LOOP
|
||||
: OMP_IDENT_WORK_SECTIONS);
|
||||
auto *ThreadId = getThreadID(CGF, Loc);
|
||||
auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
|
||||
auto *StaticInitFunction =
|
||||
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
|
||||
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
|
||||
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
|
||||
Ordered, IL, LB, UB, ST, Chunk);
|
||||
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::emitDistributeStaticInit(
|
||||
CodeGenFunction &CGF, SourceLocation Loc,
|
||||
OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
|
||||
bool Ordered, Address IL, Address LB, Address UB, Address ST,
|
||||
llvm::Value *Chunk) {
|
||||
OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
|
||||
auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
|
||||
OpenMPDistScheduleClauseKind SchedKind,
|
||||
const CGOpenMPRuntime::StaticRTInput &Values) {
|
||||
OpenMPSchedType ScheduleNum =
|
||||
getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
|
||||
auto *UpdatedLocation =
|
||||
emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
|
||||
auto *ThreadId = getThreadID(CGF, Loc);
|
||||
auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
|
||||
auto *StaticInitFunction =
|
||||
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
|
||||
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
|
||||
ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
|
||||
OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
|
||||
UB, ST, Chunk);
|
||||
OMPC_SCHEDULE_MODIFIER_unknown, Values);
|
||||
}
|
||||
|
||||
void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
|
||||
|
|
|
@ -806,6 +806,35 @@ public:
|
|||
unsigned IVSize, bool IVSigned, bool Ordered,
|
||||
const DispatchRTInput &DispatchValues);
|
||||
|
||||
/// Struct with the values to be passed to the static runtime function
|
||||
struct StaticRTInput {
|
||||
/// Size of the iteration variable in bits.
|
||||
unsigned IVSize = 0;
|
||||
/// Sign of the iteration variable.
|
||||
bool IVSigned = false;
|
||||
/// true if loop is ordered, false otherwise.
|
||||
bool Ordered = false;
|
||||
/// Address of the output variable in which the flag of the last iteration
|
||||
/// is returned.
|
||||
Address IL = Address::invalid();
|
||||
/// Address of the output variable in which the lower iteration number is
|
||||
/// returned.
|
||||
Address LB = Address::invalid();
|
||||
/// Address of the output variable in which the upper iteration number is
|
||||
/// returned.
|
||||
Address UB = Address::invalid();
|
||||
/// Address of the output variable in which the stride value is returned
|
||||
/// necessary to generated the static_chunked scheduled loop.
|
||||
Address ST = Address::invalid();
|
||||
/// Value of the chunk for the static_chunked scheduled loop. For the
|
||||
/// default (nullptr) value, the chunk 1 will be used.
|
||||
llvm::Value *Chunk = nullptr;
|
||||
StaticRTInput(unsigned IVSize, bool IVSigned, bool Ordered, Address IL,
|
||||
Address LB, Address UB, Address ST,
|
||||
llvm::Value *Chunk = nullptr)
|
||||
: IVSize(IVSize), IVSigned(IVSigned), Ordered(Ordered), IL(IL), LB(LB),
|
||||
UB(UB), ST(ST), Chunk(Chunk) {}
|
||||
};
|
||||
/// \brief Call the appropriate runtime routine to initialize it before start
|
||||
/// of loop.
|
||||
///
|
||||
|
@ -813,55 +842,29 @@ public:
|
|||
/// specify a ordered clause on the loop construct.
|
||||
/// Depending on the loop schedule, it is necessary to call some runtime
|
||||
/// routine before start of the OpenMP loop to get the loop upper / lower
|
||||
/// bounds \a LB and \a UB and stride \a ST.
|
||||
/// bounds LB and UB and stride ST.
|
||||
///
|
||||
/// \param CGF Reference to current CodeGenFunction.
|
||||
/// \param Loc Clang source location.
|
||||
/// \param DKind Kind of the directive.
|
||||
/// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
|
||||
/// \param IVSize Size of the iteration variable in bits.
|
||||
/// \param IVSigned Sign of the iteration variable.
|
||||
/// \param Ordered true if loop is ordered, false otherwise.
|
||||
/// \param IL Address of the output variable in which the flag of the
|
||||
/// last iteration is returned.
|
||||
/// \param LB Address of the output variable in which the lower iteration
|
||||
/// number is returned.
|
||||
/// \param UB Address of the output variable in which the upper iteration
|
||||
/// number is returned.
|
||||
/// \param ST Address of the output variable in which the stride value is
|
||||
/// returned necessary to generated the static_chunked scheduled loop.
|
||||
/// \param Chunk Value of the chunk for the static_chunked scheduled loop.
|
||||
/// For the default (nullptr) value, the chunk 1 will be used.
|
||||
/// \param Values Input arguments for the construct.
|
||||
///
|
||||
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc,
|
||||
OpenMPDirectiveKind DKind,
|
||||
const OpenMPScheduleTy &ScheduleKind,
|
||||
unsigned IVSize, bool IVSigned, bool Ordered,
|
||||
Address IL, Address LB, Address UB, Address ST,
|
||||
llvm::Value *Chunk = nullptr);
|
||||
const StaticRTInput &Values);
|
||||
|
||||
///
|
||||
/// \param CGF Reference to current CodeGenFunction.
|
||||
/// \param Loc Clang source location.
|
||||
/// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause.
|
||||
/// \param IVSize Size of the iteration variable in bits.
|
||||
/// \param IVSigned Sign of the iteration variable.
|
||||
/// \param Ordered true if loop is ordered, false otherwise.
|
||||
/// \param IL Address of the output variable in which the flag of the
|
||||
/// last iteration is returned.
|
||||
/// \param LB Address of the output variable in which the lower iteration
|
||||
/// number is returned.
|
||||
/// \param UB Address of the output variable in which the upper iteration
|
||||
/// number is returned.
|
||||
/// \param ST Address of the output variable in which the stride value is
|
||||
/// returned necessary to generated the static_chunked scheduled loop.
|
||||
/// \param Chunk Value of the chunk for the static_chunked scheduled loop.
|
||||
/// For the default (nullptr) value, the chunk 1 will be used.
|
||||
/// \param Values Input arguments for the construct.
|
||||
///
|
||||
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc,
|
||||
virtual void emitDistributeStaticInit(CodeGenFunction &CGF,
|
||||
SourceLocation Loc,
|
||||
OpenMPDistScheduleClauseKind SchedKind,
|
||||
unsigned IVSize, bool IVSigned,
|
||||
bool Ordered, Address IL, Address LB,
|
||||
Address UB, Address ST,
|
||||
llvm::Value *Chunk = nullptr);
|
||||
const StaticRTInput &Values);
|
||||
|
||||
/// \brief Call the appropriate runtime routine to notify that we finished
|
||||
/// iteration of the ordered loop with the dynamic scheduling.
|
||||
|
|
|
@ -1771,9 +1771,11 @@ void CodeGenFunction::EmitOMPForOuterLoop(
|
|||
RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
|
||||
IVSigned, Ordered, DipatchRTInputValues);
|
||||
} else {
|
||||
RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
|
||||
Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
|
||||
LoopArgs.ST, LoopArgs.Chunk);
|
||||
CGOpenMPRuntime::StaticRTInput StaticInit(
|
||||
IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
|
||||
LoopArgs.ST, LoopArgs.Chunk);
|
||||
RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
|
||||
ScheduleKind, StaticInit);
|
||||
}
|
||||
|
||||
auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
|
||||
|
@ -1815,10 +1817,10 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
|
|||
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
|
||||
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
|
||||
|
||||
RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize,
|
||||
IVSigned, /* Ordered = */ false, LoopArgs.IL,
|
||||
LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
|
||||
LoopArgs.Chunk);
|
||||
CGOpenMPRuntime::StaticRTInput StaticInit(
|
||||
IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
|
||||
LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
|
||||
RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
|
||||
|
||||
// for combined 'distribute' and 'for' the increment expression of distribute
|
||||
// is store in DistInc. For 'distribute' alone, it is in Inc.
|
||||
|
@ -2227,10 +2229,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
|
|||
// chunks that are approximately equal in size, and at most one chunk is
|
||||
// distributed to each thread. Note that the size of the chunks is
|
||||
// unspecified in this case.
|
||||
RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
|
||||
IVSize, IVSigned, Ordered,
|
||||
IL.getAddress(), LB.getAddress(),
|
||||
UB.getAddress(), ST.getAddress());
|
||||
CGOpenMPRuntime::StaticRTInput StaticInit(
|
||||
IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
|
||||
UB.getAddress(), ST.getAddress());
|
||||
RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
|
||||
ScheduleKind, StaticInit);
|
||||
auto LoopExit =
|
||||
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
|
||||
// UB = min(UB, GlobalUB);
|
||||
|
@ -2462,10 +2465,11 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
|
|||
// Emit static non-chunked loop.
|
||||
OpenMPScheduleTy ScheduleKind;
|
||||
ScheduleKind.Schedule = OMPC_SCHEDULE_static;
|
||||
CGOpenMPRuntime::StaticRTInput StaticInit(
|
||||
/*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
|
||||
LB.getAddress(), UB.getAddress(), ST.getAddress());
|
||||
CGF.CGM.getOpenMPRuntime().emitForStaticInit(
|
||||
CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32,
|
||||
/*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
|
||||
UB.getAddress(), ST.getAddress());
|
||||
CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
|
||||
// UB = min(UB, GlobalUB);
|
||||
auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
|
||||
auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
|
||||
|
@ -3080,10 +3084,11 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
|
|||
// league. The size of the chunks is unspecified in this case.
|
||||
if (RT.isStaticNonchunked(ScheduleKind,
|
||||
/* Chunked */ Chunk != nullptr)) {
|
||||
CGOpenMPRuntime::StaticRTInput StaticInit(
|
||||
IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
|
||||
LB.getAddress(), UB.getAddress(), ST.getAddress());
|
||||
RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
|
||||
IVSize, IVSigned, /* Ordered = */ false,
|
||||
IL.getAddress(), LB.getAddress(),
|
||||
UB.getAddress(), ST.getAddress());
|
||||
StaticInit);
|
||||
auto LoopExit =
|
||||
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
|
||||
// UB = min(UB, GlobalUB);
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
|
||||
// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
|
||||
// CHECK-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
|
||||
// CHECK-DAG: [[DEF_LOC_DISTRIBUTE_0:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2050, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
|
||||
|
||||
// CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
|
||||
void without_schedule_clause(float *a, float *b, float *c, float *d) {
|
||||
|
@ -48,7 +49,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) {
|
|||
// CHECK-DAG: store i32 0, i32* [[LAST]]
|
||||
// CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]]
|
||||
// CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]]
|
||||
// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1)
|
||||
// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1)
|
||||
// CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]]
|
||||
// CHECK-DAG: [[USWITCH:%.+]] = icmp sgt i32 [[UBV0]], 4571423
|
||||
// CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]]
|
||||
|
@ -110,7 +111,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
|
|||
// CHECK-DAG: store i32 0, i32* [[LAST]]
|
||||
// CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]]
|
||||
// CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]]
|
||||
// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1)
|
||||
// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1)
|
||||
// CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]]
|
||||
// CHECK-DAG: [[USWITCH:%.+]] = icmp sgt i32 [[UBV0]], 4571423
|
||||
// CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]]
|
||||
|
@ -172,7 +173,7 @@ void static_chunked(float *a, float *b, float *c, float *d) {
|
|||
// CHECK-DAG: store i32 0, i32* [[LAST]]
|
||||
// CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]]
|
||||
// CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]]
|
||||
// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_0]], i32 [[GBL_TIDV]], i32 91, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 5)
|
||||
// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* [[DEF_LOC_DISTRIBUTE_0]], i32 [[GBL_TIDV]], i32 91, i32* %.omp.is_last, i32* %.omp.lb, i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 5)
|
||||
// CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]]
|
||||
// CHECK-DAG: [[USWITCH:%.+]] = icmp ugt i32 [[UBV0]], 16908288
|
||||
// CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]]
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
// CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
|
||||
// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
|
||||
// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 0, i8*
|
||||
// CHECK-DAG: [[I:@.+]] = global i8 1,
|
||||
// CHECK-DAG: [[J:@.+]] = global i8 2,
|
||||
// CHECK-DAG: [[K:@.+]] = global i8 3,
|
||||
|
@ -19,7 +20,7 @@
|
|||
void without_schedule_clause(float *a, float *b, float *c, float *d) {
|
||||
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
|
||||
#pragma omp for nowait
|
||||
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
|
||||
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
|
||||
// UB = min(UB, GlobalUB)
|
||||
// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
|
||||
// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423
|
||||
|
@ -60,7 +61,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) {
|
|||
void static_not_chunked(float *a, float *b, float *c, float *d) {
|
||||
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
|
||||
#pragma omp for schedule(static)
|
||||
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
|
||||
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
|
||||
// UB = min(UB, GlobalUB)
|
||||
// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
|
||||
// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423
|
||||
|
@ -101,7 +102,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
|
|||
void static_chunked(float *a, float *b, float *c, float *d) {
|
||||
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
|
||||
#pragma omp for schedule(monotonic: static, 5)
|
||||
// CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 536870945, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5)
|
||||
// CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 536870945, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5)
|
||||
// UB = min(UB, GlobalUB)
|
||||
// CHECK: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
|
||||
// CHECK-NEXT: [[UBCMP:%.+]] = icmp ugt i32 [[UB]], 16908288
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#define HEADER
|
||||
|
||||
// CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
|
||||
// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 514, i32 0, i32 0, i8*
|
||||
|
||||
// CHECK-LABEL: with_var_schedule
|
||||
void with_var_schedule() {
|
||||
|
@ -19,8 +20,8 @@ void with_var_schedule() {
|
|||
// CHECK: [[CHUNK:%.+]] = load i8*, i8** %
|
||||
// CHECK: [[CHUNK_VAL:%.+]] = load i8, i8* [[CHUNK]],
|
||||
// CHECK: [[CHUNK_SIZE:%.+]] = sext i8 [[CHUNK_VAL]] to i64
|
||||
// CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC:@[^,]+]], i32 [[GTID:%[^,]+]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]])
|
||||
// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
|
||||
// CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID:%[^,]+]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]])
|
||||
// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC:@[^,]+]], i32 [[GTID]])
|
||||
#pragma omp parallel for schedule(static, char(a))
|
||||
for (unsigned long long i = 1; i < 2; ++i) {
|
||||
}
|
||||
|
@ -34,7 +35,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) {
|
|||
// CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
|
||||
// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
|
||||
// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
|
||||
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
|
||||
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
|
||||
// UB = min(UB, GlobalUB)
|
||||
// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
|
||||
// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423
|
||||
|
@ -77,7 +78,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
|
|||
// CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
|
||||
// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
|
||||
// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
|
||||
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
|
||||
// CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1)
|
||||
// UB = min(UB, GlobalUB)
|
||||
// CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
|
||||
// CHECK-NEXT: [[UBCMP:%.+]] = icmp sgt i32 [[UB]], 4571423
|
||||
|
@ -120,7 +121,7 @@ void static_chunked(float *a, float *b, float *c, float *d) {
|
|||
// CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
|
||||
// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
|
||||
// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
|
||||
// CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 33, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5)
|
||||
// CHECK: call void @__kmpc_for_static_init_4u([[IDENT_T_TY]]* [[LOOP_LOC]], i32 [[GTID]], i32 33, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 5)
|
||||
// UB = min(UB, GlobalUB)
|
||||
// CHECK: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
|
||||
// CHECK-NEXT: [[UBCMP:%.+]] = icmp ugt i32 [[UB]], 16908288
|
||||
|
|
|
@ -4,7 +4,8 @@
|
|||
// expected-no-diagnostics
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
// CHECK: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
|
||||
// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
|
||||
// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 0, i8*
|
||||
// CHECK-LABEL: foo
|
||||
void foo() {};
|
||||
// CHECK-LABEL: bar
|
||||
|
@ -29,7 +30,7 @@ int main() {
|
|||
{
|
||||
// CHECK: store i32 0, i32* [[LB_PTR:%.+]],
|
||||
// CHECK: store i32 1, i32* [[UB_PTR:%.+]],
|
||||
// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_PTR:%.+]], i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[STRIDE_PTR:%.+]], i32 1, i32 1)
|
||||
// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* [[SECTIONS_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST_PTR:%.+]], i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[STRIDE_PTR:%.+]], i32 1, i32 1)
|
||||
// <<UB = min(UB, GlobalUB);>>
|
||||
// CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]]
|
||||
// CHECK: [[CMP:%.+]] = icmp slt i32 [[UB]], 1
|
||||
|
|
Loading…
Reference in New Issue