forked from OSchip/llvm-project
Reapply "[OpenMP] Refactor OMPScheduleType enum."
This reverts commit af0285122f
.
The test "libomp::loop_dispatch.c" on builder
openmp-gcc-x86_64-linux-debian fails from time-to-time.
See #54969. This patch is unrelated.
This commit is contained in:
parent
25445b94db
commit
2d92ee97f1
|
@ -3760,9 +3760,11 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
|
|||
CGM.getOpenMPRuntime().getOMPBuilder();
|
||||
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
|
||||
AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
|
||||
OMPBuilder.applyWorkshareLoop(Builder.getCurrentDebugLocation(), CLI,
|
||||
AllocaIP, NeedsBarrier, SchedKind,
|
||||
ChunkSize);
|
||||
OMPBuilder.applyWorkshareLoop(
|
||||
Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
|
||||
SchedKind, ChunkSize, /*HasSimdModifier=*/false,
|
||||
/*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
|
||||
/*HasOrderedClause=*/false);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
// CHECK-NEXT: store i32 %[[DOTCOUNT]], i32* %[[P_UPPERBOUND]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 38, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 1)
|
||||
// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1073741862, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 1)
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER_OUTER_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_HEADER:.*]]:
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
// CHECK-NEXT: store i32 %[[DOTCOUNT]], i32* %[[P_UPPERBOUND]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 35, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 1)
|
||||
// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1073741859, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 1)
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER_OUTER_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_HEADER:.*]]:
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
// CHECK-NEXT: store i32 %[[DOTCOUNT]], i32* %[[P_UPPERBOUND]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 35, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 5)
|
||||
// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1073741859, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 5)
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER_OUTER_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_HEADER:.*]]:
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
// CHECK-NEXT: store i32 %[[DOTCOUNT]], i32* %[[P_UPPERBOUND]], align 4
|
||||
// CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4
|
||||
// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
|
||||
// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 37, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 1)
|
||||
// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1073741861, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 1)
|
||||
// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER_OUTER_COND:.+]]
|
||||
// CHECK-EMPTY:
|
||||
// CHECK-NEXT: [[OMP_LOOP_HEADER:.*]]:
|
||||
|
|
|
@ -74,34 +74,114 @@ enum class IdentFlag {
|
|||
|
||||
/// \note This needs to be kept in sync with kmp.h enum sched_type.
|
||||
/// Todo: Update kmp.h to include this file, and remove the enums in kmp.h
|
||||
/// To complete this, more enum values will need to be moved here.
|
||||
enum class OMPScheduleType {
|
||||
StaticChunked = 33,
|
||||
Static = 34, // static unspecialized
|
||||
DynamicChunked = 35,
|
||||
GuidedChunked = 36, // guided unspecialized
|
||||
Runtime = 37,
|
||||
Auto = 38, // auto
|
||||
// For typed comparisons, not a valid schedule
|
||||
None = 0,
|
||||
|
||||
StaticBalancedChunked = 45, // static with chunk adjustment (e.g., simd)
|
||||
GuidedSimd = 46, // guided with chunk adjustment
|
||||
RuntimeSimd = 47, // runtime with chunk adjustment
|
||||
// Schedule algorithms
|
||||
BaseStaticChunked = 1,
|
||||
BaseStatic = 2,
|
||||
BaseDynamicChunked = 3,
|
||||
BaseGuidedChunked = 4,
|
||||
BaseRuntime = 5,
|
||||
BaseAuto = 6,
|
||||
BaseTrapezoidal = 7,
|
||||
BaseGreedy = 8,
|
||||
BaseBalanced = 9,
|
||||
BaseGuidedIterativeChunked = 10,
|
||||
BaseGuidedAnalyticalChunked = 11,
|
||||
BaseSteal = 12,
|
||||
|
||||
OrderedStaticChunked = 65,
|
||||
OrderedStatic = 66, // ordered static unspecialized
|
||||
OrderedDynamicChunked = 67,
|
||||
OrderedGuidedChunked = 68,
|
||||
OrderedRuntime = 69,
|
||||
OrderedAuto = 70, // ordered auto
|
||||
// with chunk adjustment (e.g., simd)
|
||||
BaseStaticBalancedChunked = 13,
|
||||
BaseGuidedSimd = 14,
|
||||
BaseRuntimeSimd = 15,
|
||||
|
||||
DistributeChunked = 91, // distribute static chunked
|
||||
Distribute = 92, // distribute static unspecialized
|
||||
// static schedules algorithims for distribute
|
||||
BaseDistributeChunked = 27,
|
||||
BaseDistribute = 28,
|
||||
|
||||
// Modifier flags to be combined with schedule algorithms
|
||||
ModifierUnordered = (1 << 5),
|
||||
ModifierOrdered = (1 << 6),
|
||||
ModifierNomerge = (1 << 7),
|
||||
ModifierMonotonic = (1 << 29),
|
||||
ModifierNonmonotonic = (1 << 30),
|
||||
|
||||
// Masks combining multiple flags
|
||||
OrderingMask = ModifierUnordered | ModifierOrdered | ModifierNomerge,
|
||||
MonotonicityMask = ModifierMonotonic | ModifierNonmonotonic,
|
||||
ModifierMask = OrderingMask | MonotonicityMask,
|
||||
|
||||
// valid schedule type values, without monotonicity flags
|
||||
UnorderedStaticChunked = BaseStaticChunked | ModifierUnordered, // 33
|
||||
UnorderedStatic = BaseStatic | ModifierUnordered, // 34
|
||||
UnorderedDynamicChunked = BaseDynamicChunked | ModifierUnordered, // 35
|
||||
UnorderedGuidedChunked = BaseGuidedChunked | ModifierUnordered, // 36
|
||||
UnorderedRuntime = BaseRuntime | ModifierUnordered, // 37
|
||||
UnorderedAuto = BaseAuto | ModifierUnordered, // 38
|
||||
UnorderedTrapezoidal = BaseTrapezoidal | ModifierUnordered, // 39
|
||||
UnorderedGreedy = BaseGreedy | ModifierUnordered, // 40
|
||||
UnorderedBalanced = BaseBalanced | ModifierUnordered, // 41
|
||||
UnorderedGuidedIterativeChunked =
|
||||
BaseGuidedIterativeChunked | ModifierUnordered, // 42
|
||||
UnorderedGuidedAnalyticalChunked =
|
||||
BaseGuidedAnalyticalChunked | ModifierUnordered, // 43
|
||||
UnorderedSteal = BaseSteal | ModifierUnordered, // 44
|
||||
|
||||
UnorderedStaticBalancedChunked =
|
||||
BaseStaticBalancedChunked | ModifierUnordered, // 45
|
||||
UnorderedGuidedSimd = BaseGuidedSimd | ModifierUnordered, // 46
|
||||
UnorderedRuntimeSimd = BaseRuntimeSimd | ModifierUnordered, // 47
|
||||
|
||||
OrderedStaticChunked = BaseStaticChunked | ModifierOrdered, // 65
|
||||
OrderedStatic = BaseStatic | ModifierOrdered, // 66
|
||||
OrderedDynamicChunked = BaseDynamicChunked | ModifierOrdered, // 67
|
||||
OrderedGuidedChunked = BaseGuidedChunked | ModifierOrdered, // 68
|
||||
OrderedRuntime = BaseRuntime | ModifierOrdered, // 69
|
||||
OrderedAuto = BaseAuto | ModifierOrdered, // 70
|
||||
OrderdTrapezoidal = BaseTrapezoidal | ModifierOrdered, // 71
|
||||
|
||||
OrderedDistributeChunked = BaseDistributeChunked | ModifierOrdered, // 91
|
||||
OrderedDistribute = BaseDistribute | ModifierOrdered, // 92
|
||||
|
||||
NomergeUnorderedStaticChunked =
|
||||
BaseStaticChunked | ModifierUnordered | ModifierNomerge, // 161
|
||||
NomergeUnorderedStatic =
|
||||
BaseStatic | ModifierUnordered | ModifierNomerge, // 162
|
||||
NomergeUnorderedDynamicChunked =
|
||||
BaseDynamicChunked | ModifierUnordered | ModifierNomerge, // 163
|
||||
NomergeUnorderedGuidedChunked =
|
||||
BaseGuidedChunked | ModifierUnordered | ModifierNomerge, // 164
|
||||
NomergeUnorderedRuntime =
|
||||
BaseRuntime | ModifierUnordered | ModifierNomerge, // 165
|
||||
NomergeUnorderedAuto = BaseAuto | ModifierUnordered | ModifierNomerge, // 166
|
||||
NomergeUnorderedTrapezoidal =
|
||||
BaseTrapezoidal | ModifierUnordered | ModifierNomerge, // 167
|
||||
NomergeUnorderedGreedy =
|
||||
BaseGreedy | ModifierUnordered | ModifierNomerge, // 168
|
||||
NomergeUnorderedBalanced =
|
||||
BaseBalanced | ModifierUnordered | ModifierNomerge, // 169
|
||||
NomergeUnorderedGuidedIterativeChunked =
|
||||
BaseGuidedIterativeChunked | ModifierUnordered | ModifierNomerge, // 170
|
||||
NomergeUnorderedGuidedAnalyticalChunked =
|
||||
BaseGuidedAnalyticalChunked | ModifierUnordered | ModifierNomerge, // 171
|
||||
NomergeUnorderedSteal =
|
||||
BaseSteal | ModifierUnordered | ModifierNomerge, // 172
|
||||
|
||||
NomergeOrderedStaticChunked =
|
||||
BaseStaticChunked | ModifierOrdered | ModifierNomerge, // 193
|
||||
NomergeOrderedStatic = BaseStatic | ModifierOrdered | ModifierNomerge, // 194
|
||||
NomergeOrderedDynamicChunked =
|
||||
BaseDynamicChunked | ModifierOrdered | ModifierNomerge, // 195
|
||||
NomergeOrderedGuidedChunked =
|
||||
BaseGuidedChunked | ModifierOrdered | ModifierNomerge, // 196
|
||||
NomergeOrderedRuntime =
|
||||
BaseRuntime | ModifierOrdered | ModifierNomerge, // 197
|
||||
NomergeOrderedAuto = BaseAuto | ModifierOrdered | ModifierNomerge, // 198
|
||||
NomergeOrderedTrapezoidal =
|
||||
BaseTrapezoidal | ModifierOrdered | ModifierNomerge, // 199
|
||||
|
||||
ModifierMonotonic =
|
||||
(1 << 29), // Set if the monotonic schedule modifier was present
|
||||
ModifierNonmonotonic =
|
||||
(1 << 30), // Set if the nonmonotonic schedule modifier was present
|
||||
ModifierMask = ModifierMonotonic | ModifierNonmonotonic,
|
||||
LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierMask)
|
||||
};
|
||||
|
||||
|
|
|
@ -344,6 +344,7 @@ public:
|
|||
ArrayRef<CanonicalLoopInfo *> Loops,
|
||||
InsertPointTy ComputeIP);
|
||||
|
||||
private:
|
||||
/// Modifies the canonical loop to be a statically-scheduled workshare loop.
|
||||
///
|
||||
/// This takes a \p LoopInfo representing a canonical loop, such as the one
|
||||
|
@ -403,17 +404,15 @@ public:
|
|||
/// the loop.
|
||||
/// \param Chunk The size of loop chunk considered as a unit when
|
||||
/// scheduling. If \p nullptr, defaults to 1.
|
||||
/// \param Ordered Indicates whether the ordered clause is specified without
|
||||
/// parameter.
|
||||
///
|
||||
/// \returns Point where to insert code after the workshare construct.
|
||||
InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
|
||||
InsertPointTy AllocaIP,
|
||||
omp::OMPScheduleType SchedType,
|
||||
bool NeedsBarrier,
|
||||
Value *Chunk = nullptr,
|
||||
bool Ordered = false);
|
||||
Value *Chunk = nullptr);
|
||||
|
||||
public:
|
||||
/// Modifies the canonical loop to be a workshare loop.
|
||||
///
|
||||
/// This takes a \p LoopInfo representing a canonical loop, such as the one
|
||||
|
@ -436,13 +435,23 @@ public:
|
|||
/// the loop.
|
||||
/// \param SchedKind Scheduling algorithm to use.
|
||||
/// \param ChunkSize The chunk size for the inner loop.
|
||||
/// \param HasSimdModifier Whether the simd modifier is present in the
|
||||
/// schedule clause.
|
||||
/// \param HasMonotonicModifier Whether the monotonic modifier is present in
|
||||
/// the schedule clause.
|
||||
/// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
|
||||
/// present in the schedule clause.
|
||||
/// \param HasOrderedClause Whether the (parameterless) ordered clause is
|
||||
/// present.
|
||||
///
|
||||
/// \returns Point where to insert code after the workshare construct.
|
||||
InsertPointTy applyWorkshareLoop(
|
||||
DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
|
||||
bool NeedsBarrier,
|
||||
llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
|
||||
Value *ChunkSize = nullptr);
|
||||
Value *ChunkSize = nullptr, bool HasSimdModifier = false,
|
||||
bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
|
||||
bool HasOrderedClause = false);
|
||||
|
||||
/// Tile a loop nest.
|
||||
///
|
||||
|
|
|
@ -69,8 +69,168 @@ static bool isConflictIP(IRBuilder<>::InsertPoint IP1,
|
|||
return false;
|
||||
return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
|
||||
}
|
||||
|
||||
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType) {
|
||||
// Valid ordered/unordered and base algorithm combinations.
|
||||
switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
|
||||
case OMPScheduleType::UnorderedStaticChunked:
|
||||
case OMPScheduleType::UnorderedStatic:
|
||||
case OMPScheduleType::UnorderedDynamicChunked:
|
||||
case OMPScheduleType::UnorderedGuidedChunked:
|
||||
case OMPScheduleType::UnorderedRuntime:
|
||||
case OMPScheduleType::UnorderedAuto:
|
||||
case OMPScheduleType::UnorderedTrapezoidal:
|
||||
case OMPScheduleType::UnorderedGreedy:
|
||||
case OMPScheduleType::UnorderedBalanced:
|
||||
case OMPScheduleType::UnorderedGuidedIterativeChunked:
|
||||
case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
|
||||
case OMPScheduleType::UnorderedSteal:
|
||||
case OMPScheduleType::UnorderedStaticBalancedChunked:
|
||||
case OMPScheduleType::UnorderedGuidedSimd:
|
||||
case OMPScheduleType::UnorderedRuntimeSimd:
|
||||
case OMPScheduleType::OrderedStaticChunked:
|
||||
case OMPScheduleType::OrderedStatic:
|
||||
case OMPScheduleType::OrderedDynamicChunked:
|
||||
case OMPScheduleType::OrderedGuidedChunked:
|
||||
case OMPScheduleType::OrderedRuntime:
|
||||
case OMPScheduleType::OrderedAuto:
|
||||
case OMPScheduleType::OrderdTrapezoidal:
|
||||
case OMPScheduleType::NomergeUnorderedStaticChunked:
|
||||
case OMPScheduleType::NomergeUnorderedStatic:
|
||||
case OMPScheduleType::NomergeUnorderedDynamicChunked:
|
||||
case OMPScheduleType::NomergeUnorderedGuidedChunked:
|
||||
case OMPScheduleType::NomergeUnorderedRuntime:
|
||||
case OMPScheduleType::NomergeUnorderedAuto:
|
||||
case OMPScheduleType::NomergeUnorderedTrapezoidal:
|
||||
case OMPScheduleType::NomergeUnorderedGreedy:
|
||||
case OMPScheduleType::NomergeUnorderedBalanced:
|
||||
case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
|
||||
case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
|
||||
case OMPScheduleType::NomergeUnorderedSteal:
|
||||
case OMPScheduleType::NomergeOrderedStaticChunked:
|
||||
case OMPScheduleType::NomergeOrderedStatic:
|
||||
case OMPScheduleType::NomergeOrderedDynamicChunked:
|
||||
case OMPScheduleType::NomergeOrderedGuidedChunked:
|
||||
case OMPScheduleType::NomergeOrderedRuntime:
|
||||
case OMPScheduleType::NomergeOrderedAuto:
|
||||
case OMPScheduleType::NomergeOrderedTrapezoidal:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
// Must not set both monotonicity modifiers at the same time.
|
||||
OMPScheduleType MonotonicityFlags =
|
||||
SchedType & OMPScheduleType::MonotonicityMask;
|
||||
if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Determine which scheduling algorithm to use, determined from schedule clause
|
||||
/// arguments.
|
||||
static OMPScheduleType
|
||||
getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks,
|
||||
bool HasSimdModifier) {
|
||||
// Currently, the default schedule it static.
|
||||
switch (ClauseKind) {
|
||||
case OMP_SCHEDULE_Default:
|
||||
case OMP_SCHEDULE_Static:
|
||||
return HasChunks ? OMPScheduleType::BaseStaticChunked
|
||||
: OMPScheduleType::BaseStatic;
|
||||
case OMP_SCHEDULE_Dynamic:
|
||||
return OMPScheduleType::BaseDynamicChunked;
|
||||
case OMP_SCHEDULE_Guided:
|
||||
return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
|
||||
: OMPScheduleType::BaseGuidedChunked;
|
||||
case OMP_SCHEDULE_Auto:
|
||||
return llvm::omp::OMPScheduleType::BaseAuto;
|
||||
case OMP_SCHEDULE_Runtime:
|
||||
return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
|
||||
: OMPScheduleType::BaseRuntime;
|
||||
}
|
||||
llvm_unreachable("unhandled schedule clause argument");
|
||||
}
|
||||
|
||||
/// Adds ordering modifier flags to schedule type.
|
||||
static OMPScheduleType
|
||||
getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType,
|
||||
bool HasOrderedClause) {
|
||||
assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
|
||||
OMPScheduleType::None &&
|
||||
"Must not have ordering nor monotonicity flags already set");
|
||||
|
||||
OMPScheduleType OrderingModifier = HasOrderedClause
|
||||
? OMPScheduleType::ModifierOrdered
|
||||
: OMPScheduleType::ModifierUnordered;
|
||||
OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
|
||||
|
||||
// Unsupported combinations
|
||||
if (OrderingScheduleType ==
|
||||
(OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
|
||||
return OMPScheduleType::OrderedGuidedChunked;
|
||||
else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
|
||||
OMPScheduleType::ModifierOrdered))
|
||||
return OMPScheduleType::OrderedRuntime;
|
||||
|
||||
return OrderingScheduleType;
|
||||
}
|
||||
|
||||
/// Adds monotonicity modifier flags to schedule type.
|
||||
static OMPScheduleType
|
||||
getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType,
|
||||
bool HasSimdModifier, bool HasMonotonic,
|
||||
bool HasNonmonotonic, bool HasOrderedClause) {
|
||||
assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
|
||||
OMPScheduleType::None &&
|
||||
"Must not have monotonicity flags already set");
|
||||
assert((!HasMonotonic || !HasNonmonotonic) &&
|
||||
"Monotonic and Nonmonotonic are contradicting each other");
|
||||
|
||||
if (HasMonotonic) {
|
||||
return ScheduleType | OMPScheduleType::ModifierMonotonic;
|
||||
} else if (HasNonmonotonic) {
|
||||
return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
|
||||
} else {
|
||||
// OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description.
|
||||
// If the static schedule kind is specified or if the ordered clause is
|
||||
// specified, and if the nonmonotonic modifier is not specified, the
|
||||
// effect is as if the monotonic modifier is specified. Otherwise, unless
|
||||
// the monotonic modifier is specified, the effect is as if the
|
||||
// nonmonotonic modifier is specified.
|
||||
OMPScheduleType BaseScheduleType =
|
||||
ScheduleType & ~OMPScheduleType::ModifierMask;
|
||||
if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
|
||||
(BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
|
||||
HasOrderedClause) {
|
||||
// The monotonic is used by default in openmp runtime library, so no need
|
||||
// to set it.
|
||||
return ScheduleType;
|
||||
} else {
|
||||
return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine the schedule type using schedule and ordering clause arguments.
|
||||
static OMPScheduleType
|
||||
computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks,
|
||||
bool HasSimdModifier, bool HasMonotonicModifier,
|
||||
bool HasNonmonotonicModifier, bool HasOrderedClause) {
|
||||
OMPScheduleType BaseSchedule =
|
||||
getOpenMPBaseScheduleType(ClauseKind, HasChunks, HasSimdModifier);
|
||||
OMPScheduleType OrderedSchedule =
|
||||
getOpenMPOrderingScheduleType(BaseSchedule, HasOrderedClause);
|
||||
OMPScheduleType Result = getOpenMPMonotonicityScheduleType(
|
||||
OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
|
||||
HasNonmonotonicModifier, HasOrderedClause);
|
||||
|
||||
assert(isValidWorkshareLoopScheduleType(Result));
|
||||
return Result;
|
||||
}
|
||||
|
||||
/// Make \p Source branch to \p Target.
|
||||
///
|
||||
/// Handles two situations:
|
||||
|
@ -1651,8 +1811,8 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
|
|||
|
||||
Value *ThreadNum = getOrCreateThreadID(SrcLoc);
|
||||
|
||||
Constant *SchedulingType =
|
||||
ConstantInt::get(I32Type, static_cast<int>(OMPScheduleType::Static));
|
||||
Constant *SchedulingType = ConstantInt::get(
|
||||
I32Type, static_cast<int>(OMPScheduleType::UnorderedStatic));
|
||||
|
||||
// Call the "init" function and update the trip count of the loop with the
|
||||
// value it produced.
|
||||
|
@ -1738,7 +1898,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
|
|||
Builder.CreateZExt(OrigTripCount, InternalIVTy, "tripcount");
|
||||
|
||||
Constant *SchedulingType = ConstantInt::get(
|
||||
I32Type, static_cast<int>(OMPScheduleType::StaticChunked));
|
||||
I32Type, static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
|
||||
Builder.CreateStore(Zero, PLowerBound);
|
||||
Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
|
||||
Builder.CreateStore(OrigUpperBound, PUpperBound);
|
||||
|
@ -1836,42 +1996,56 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
|
|||
return {DispatchAfter, DispatchAfter->getFirstInsertionPt()};
|
||||
}
|
||||
|
||||
OpenMPIRBuilder::InsertPointTy
|
||||
OpenMPIRBuilder::applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
|
||||
InsertPointTy AllocaIP, bool NeedsBarrier,
|
||||
llvm::omp::ScheduleKind SchedKind,
|
||||
llvm::Value *ChunkSize) {
|
||||
switch (SchedKind) {
|
||||
case llvm::omp::ScheduleKind::OMP_SCHEDULE_Default:
|
||||
assert(!ChunkSize && "No chunk size with default schedule (which for clang "
|
||||
"is static non-chunked)");
|
||||
LLVM_FALLTHROUGH;
|
||||
case llvm::omp::ScheduleKind::OMP_SCHEDULE_Static:
|
||||
if (ChunkSize)
|
||||
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoop(
|
||||
DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
|
||||
bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind,
|
||||
llvm::Value *ChunkSize, bool HasSimdModifier, bool HasMonotonicModifier,
|
||||
bool HasNonmonotonicModifier, bool HasOrderedClause) {
|
||||
OMPScheduleType EffectiveScheduleType = computeOpenMPScheduleType(
|
||||
SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
|
||||
HasNonmonotonicModifier, HasOrderedClause);
|
||||
|
||||
bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
|
||||
OMPScheduleType::ModifierOrdered;
|
||||
switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
|
||||
case OMPScheduleType::BaseStatic:
|
||||
assert(!ChunkSize && "No chunk size with static-chunked schedule");
|
||||
if (IsOrdered)
|
||||
return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
|
||||
NeedsBarrier, ChunkSize);
|
||||
// FIXME: Monotonicity ignored?
|
||||
return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
|
||||
|
||||
case OMPScheduleType::BaseStaticChunked:
|
||||
if (IsOrdered)
|
||||
return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
|
||||
NeedsBarrier, ChunkSize);
|
||||
// FIXME: Monotonicity ignored?
|
||||
return applyStaticChunkedWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier,
|
||||
ChunkSize);
|
||||
return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
|
||||
case llvm::omp::ScheduleKind::OMP_SCHEDULE_Auto:
|
||||
assert(!ChunkSize && "Chunk size with auto scheduling not user-defined");
|
||||
return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, OMPScheduleType::Auto,
|
||||
NeedsBarrier, nullptr);
|
||||
case llvm::omp::ScheduleKind::OMP_SCHEDULE_Dynamic:
|
||||
return applyDynamicWorkshareLoop(DL, CLI, AllocaIP,
|
||||
OMPScheduleType::DynamicChunked,
|
||||
NeedsBarrier, ChunkSize);
|
||||
case llvm::omp::ScheduleKind::OMP_SCHEDULE_Guided:
|
||||
return applyDynamicWorkshareLoop(DL, CLI, AllocaIP,
|
||||
OMPScheduleType::GuidedChunked,
|
||||
NeedsBarrier, ChunkSize);
|
||||
case llvm::omp::ScheduleKind::OMP_SCHEDULE_Runtime:
|
||||
assert(!ChunkSize &&
|
||||
"Chunk size with runtime scheduling implied to be one");
|
||||
return applyDynamicWorkshareLoop(
|
||||
DL, CLI, AllocaIP, OMPScheduleType::Runtime, NeedsBarrier, nullptr);
|
||||
}
|
||||
|
||||
case OMPScheduleType::BaseRuntime:
|
||||
case OMPScheduleType::BaseAuto:
|
||||
case OMPScheduleType::BaseGreedy:
|
||||
case OMPScheduleType::BaseBalanced:
|
||||
case OMPScheduleType::BaseSteal:
|
||||
case OMPScheduleType::BaseGuidedSimd:
|
||||
case OMPScheduleType::BaseRuntimeSimd:
|
||||
assert(!ChunkSize &&
|
||||
"schedule type does not support user-defined chunk sizes");
|
||||
LLVM_FALLTHROUGH;
|
||||
case OMPScheduleType::BaseDynamicChunked:
|
||||
case OMPScheduleType::BaseGuidedChunked:
|
||||
case OMPScheduleType::BaseGuidedIterativeChunked:
|
||||
case OMPScheduleType::BaseGuidedAnalyticalChunked:
|
||||
case OMPScheduleType::BaseStaticBalancedChunked:
|
||||
return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
|
||||
NeedsBarrier, ChunkSize);
|
||||
|
||||
default:
|
||||
llvm_unreachable("Unknown/unimplemented schedule kind");
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an LLVM function to call for initializing loop bounds using OpenMP
|
||||
/// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
|
||||
|
@ -1922,10 +2096,15 @@ getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
|
|||
|
||||
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
|
||||
DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
|
||||
OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk, bool Ordered) {
|
||||
OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
|
||||
assert(CLI->isValid() && "Requires a valid canonical loop");
|
||||
assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
|
||||
"Require dedicated allocate IP");
|
||||
assert(isValidWorkshareLoopScheduleType(SchedType) &&
|
||||
"Require valid schedule type");
|
||||
|
||||
bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
|
||||
OMPScheduleType::ModifierOrdered;
|
||||
|
||||
// Set up the source location value for OpenMP runtime.
|
||||
Builder.SetCurrentDebugLocation(DL);
|
||||
|
|
|
@ -4276,10 +4276,10 @@ struct AAKernelInfoCallSite : AAKernelInfo {
|
|||
unsigned ScheduleTypeVal =
|
||||
ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
|
||||
switch (OMPScheduleType(ScheduleTypeVal)) {
|
||||
case OMPScheduleType::Static:
|
||||
case OMPScheduleType::StaticChunked:
|
||||
case OMPScheduleType::Distribute:
|
||||
case OMPScheduleType::DistributeChunked:
|
||||
case OMPScheduleType::UnorderedStatic:
|
||||
case OMPScheduleType::UnorderedStaticChunked:
|
||||
case OMPScheduleType::OrderedDistribute:
|
||||
case OMPScheduleType::OrderedDistributeChunked:
|
||||
break;
|
||||
default:
|
||||
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
|
||||
|
|
|
@ -140,6 +140,21 @@ static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID,
|
|||
return Calls.front();
|
||||
}
|
||||
|
||||
static omp::ScheduleKind getSchedKind(omp::OMPScheduleType SchedType) {
|
||||
switch (SchedType & ~omp::OMPScheduleType::ModifierMask) {
|
||||
case omp::OMPScheduleType::BaseDynamicChunked:
|
||||
return omp::OMP_SCHEDULE_Dynamic;
|
||||
case omp::OMPScheduleType::BaseGuidedChunked:
|
||||
return omp::OMP_SCHEDULE_Guided;
|
||||
case omp::OMPScheduleType::BaseAuto:
|
||||
return omp::OMP_SCHEDULE_Auto;
|
||||
case omp::OMPScheduleType::BaseRuntime:
|
||||
return omp::OMP_SCHEDULE_Runtime;
|
||||
default:
|
||||
llvm_unreachable("unknown type for this test");
|
||||
}
|
||||
}
|
||||
|
||||
class OpenMPIRBuilderTest : public testing::Test {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
|
@ -1898,7 +1913,8 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
|
|||
Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
|
||||
InsertPointTy AllocaIP = Builder.saveIP();
|
||||
|
||||
OMPBuilder.applyStaticWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true);
|
||||
OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true,
|
||||
OMP_SCHEDULE_Static);
|
||||
|
||||
BasicBlock *Cond = Body->getSinglePredecessor();
|
||||
Instruction *Cmp = &*Cond->begin();
|
||||
|
@ -1989,8 +2005,8 @@ TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) {
|
|||
Value *ChunkSize = ConstantInt::get(LCTy, 5);
|
||||
InsertPointTy AllocaIP{&F->getEntryBlock(),
|
||||
F->getEntryBlock().getFirstInsertionPt()};
|
||||
OMPBuilder.applyStaticChunkedWorkshareLoop(DL, CLI, AllocaIP,
|
||||
/*NeedsBarrier=*/true, ChunkSize);
|
||||
OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true,
|
||||
OMP_SCHEDULE_Static, ChunkSize);
|
||||
|
||||
OMPBuilder.finalize();
|
||||
EXPECT_FALSE(verifyModule(*M, &errs()));
|
||||
|
@ -2056,13 +2072,13 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
|
|||
|
||||
omp::OMPScheduleType SchedType = GetParam();
|
||||
uint32_t ChunkSize = 1;
|
||||
switch (SchedType & ~omp::OMPScheduleType::ModifierMask) {
|
||||
case omp::OMPScheduleType::DynamicChunked:
|
||||
case omp::OMPScheduleType::GuidedChunked:
|
||||
switch (SchedType & ~OMPScheduleType::ModifierMask) {
|
||||
case omp::OMPScheduleType::BaseDynamicChunked:
|
||||
case omp::OMPScheduleType::BaseGuidedChunked:
|
||||
ChunkSize = 7;
|
||||
break;
|
||||
case omp::OMPScheduleType::Auto:
|
||||
case omp::OMPScheduleType::Runtime:
|
||||
case omp::OMPScheduleType::BaseAuto:
|
||||
case omp::OMPScheduleType::BaseRuntime:
|
||||
ChunkSize = 1;
|
||||
break;
|
||||
default:
|
||||
|
@ -2074,7 +2090,8 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
|
|||
Value *StartVal = ConstantInt::get(LCTy, 10);
|
||||
Value *StopVal = ConstantInt::get(LCTy, 52);
|
||||
Value *StepVal = ConstantInt::get(LCTy, 2);
|
||||
Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize);
|
||||
Value *ChunkVal =
|
||||
(ChunkSize == 1) ? nullptr : ConstantInt::get(LCTy, ChunkSize);
|
||||
auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
|
||||
|
||||
CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
|
||||
|
@ -2092,10 +2109,15 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
|
|||
BasicBlock *LatchBlock = CLI->getLatch();
|
||||
Value *IV = CLI->getIndVar();
|
||||
|
||||
InsertPointTy EndIP =
|
||||
OMPBuilder.applyDynamicWorkshareLoop(DL, CLI, AllocaIP, SchedType,
|
||||
/*NeedsBarrier=*/true, ChunkVal,
|
||||
InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop(
|
||||
DL, CLI, AllocaIP, /*NeedsBarrier=*/true, getSchedKind(SchedType),
|
||||
ChunkVal, /*Simd=*/false,
|
||||
(SchedType & omp::OMPScheduleType::ModifierMonotonic) ==
|
||||
omp::OMPScheduleType::ModifierMonotonic,
|
||||
(SchedType & omp::OMPScheduleType::ModifierNonmonotonic) ==
|
||||
omp::OMPScheduleType::ModifierNonmonotonic,
|
||||
/*Ordered=*/false);
|
||||
|
||||
// The returned value should be the "after" point.
|
||||
ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock());
|
||||
ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint());
|
||||
|
@ -2133,7 +2155,17 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
|
|||
EXPECT_EQ(InitCall->arg_size(), 7U);
|
||||
EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize));
|
||||
ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
|
||||
EXPECT_EQ(SchedVal->getValue(), static_cast<uint64_t>(SchedType));
|
||||
if ((SchedType & OMPScheduleType::MonotonicityMask) ==
|
||||
OMPScheduleType::None) {
|
||||
// Implementation is allowed to add default nonmonotonicity flag
|
||||
EXPECT_EQ(
|
||||
static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()) |
|
||||
OMPScheduleType::ModifierNonmonotonic,
|
||||
SchedType | OMPScheduleType::ModifierNonmonotonic);
|
||||
} else {
|
||||
EXPECT_EQ(static_cast<OMPScheduleType>(SchedVal->getValue().getZExtValue()),
|
||||
SchedType);
|
||||
}
|
||||
|
||||
ConstantInt *OrigLowerBound =
|
||||
dyn_cast<ConstantInt>(LowerBoundStore->getValueOperand());
|
||||
|
@ -2171,20 +2203,21 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
|
|||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams,
|
||||
::testing::Values(omp::OMPScheduleType::DynamicChunked,
|
||||
omp::OMPScheduleType::GuidedChunked,
|
||||
omp::OMPScheduleType::Auto, omp::OMPScheduleType::Runtime,
|
||||
omp::OMPScheduleType::DynamicChunked |
|
||||
::testing::Values(omp::OMPScheduleType::UnorderedDynamicChunked,
|
||||
omp::OMPScheduleType::UnorderedGuidedChunked,
|
||||
omp::OMPScheduleType::UnorderedAuto,
|
||||
omp::OMPScheduleType::UnorderedRuntime,
|
||||
omp::OMPScheduleType::UnorderedDynamicChunked |
|
||||
omp::OMPScheduleType::ModifierMonotonic,
|
||||
omp::OMPScheduleType::DynamicChunked |
|
||||
omp::OMPScheduleType::UnorderedDynamicChunked |
|
||||
omp::OMPScheduleType::ModifierNonmonotonic,
|
||||
omp::OMPScheduleType::GuidedChunked |
|
||||
omp::OMPScheduleType::UnorderedGuidedChunked |
|
||||
omp::OMPScheduleType::ModifierMonotonic,
|
||||
omp::OMPScheduleType::GuidedChunked |
|
||||
omp::OMPScheduleType::UnorderedGuidedChunked |
|
||||
omp::OMPScheduleType::ModifierNonmonotonic,
|
||||
omp::OMPScheduleType::Auto |
|
||||
omp::OMPScheduleType::UnorderedAuto |
|
||||
omp::OMPScheduleType::ModifierMonotonic,
|
||||
omp::OMPScheduleType::Runtime |
|
||||
omp::OMPScheduleType::UnorderedRuntime |
|
||||
omp::OMPScheduleType::ModifierMonotonic));
|
||||
|
||||
TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) {
|
||||
|
@ -2194,7 +2227,6 @@ TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) {
|
|||
IRBuilder<> Builder(BB);
|
||||
OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
|
||||
|
||||
omp::OMPScheduleType SchedType = omp::OMPScheduleType::OrderedStaticChunked;
|
||||
uint32_t ChunkSize = 1;
|
||||
Type *LCTy = Type::getInt32Ty(Ctx);
|
||||
Value *StartVal = ConstantInt::get(LCTy, 10);
|
||||
|
@ -2217,10 +2249,11 @@ TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) {
|
|||
BasicBlock *LatchBlock = CLI->getLatch();
|
||||
Value *IV = CLI->getIndVar();
|
||||
|
||||
InsertPointTy EndIP =
|
||||
OMPBuilder.applyDynamicWorkshareLoop(DL, CLI, AllocaIP, SchedType,
|
||||
/*NeedsBarrier=*/true, ChunkVal,
|
||||
/*Ordered=*/true);
|
||||
InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop(
|
||||
DL, CLI, AllocaIP, /*NeedsBarrier=*/true, OMP_SCHEDULE_Static, ChunkVal,
|
||||
/*HasSimdModifier=*/false, /*HasMonotonicModifier=*/false,
|
||||
/*HasNonmonotonicModifier=*/false,
|
||||
/*HasOrderedClause=*/true);
|
||||
|
||||
// Add a termination to our block and check that it is internally consistent.
|
||||
Builder.restoreIP(EndIP);
|
||||
|
@ -2241,7 +2274,8 @@ TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) {
|
|||
EXPECT_NE(InitCall, nullptr);
|
||||
EXPECT_EQ(InitCall->arg_size(), 7U);
|
||||
ConstantInt *SchedVal = cast<ConstantInt>(InitCall->getArgOperand(2));
|
||||
EXPECT_EQ(SchedVal->getValue(), static_cast<uint64_t>(SchedType));
|
||||
EXPECT_EQ(SchedVal->getValue(),
|
||||
static_cast<uint64_t>(OMPScheduleType::OrderedStaticChunked));
|
||||
|
||||
CallInst *FiniCall = dyn_cast<CallInst>(
|
||||
&*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true)));
|
||||
|
|
|
@ -26,6 +26,25 @@
|
|||
using namespace mlir;
|
||||
|
||||
namespace {
|
||||
static llvm::omp::ScheduleKind
|
||||
convertToScheduleKind(Optional<omp::ClauseScheduleKind> schedKind) {
|
||||
if (!schedKind.hasValue())
|
||||
return llvm::omp::OMP_SCHEDULE_Default;
|
||||
switch (schedKind.getValue()) {
|
||||
case omp::ClauseScheduleKind::Static:
|
||||
return llvm::omp::OMP_SCHEDULE_Static;
|
||||
case omp::ClauseScheduleKind::Dynamic:
|
||||
return llvm::omp::OMP_SCHEDULE_Dynamic;
|
||||
case omp::ClauseScheduleKind::Guided:
|
||||
return llvm::omp::OMP_SCHEDULE_Guided;
|
||||
case omp::ClauseScheduleKind::Auto:
|
||||
return llvm::omp::OMP_SCHEDULE_Auto;
|
||||
case omp::ClauseScheduleKind::Runtime:
|
||||
return llvm::omp::OMP_SCHEDULE_Runtime;
|
||||
}
|
||||
llvm_unreachable("unhandled schedule clause argument");
|
||||
}
|
||||
|
||||
/// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
|
||||
/// insertion points for allocas.
|
||||
class OpenMPAllocaStackFrame
|
||||
|
@ -808,92 +827,16 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
|
|||
|
||||
allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
|
||||
|
||||
// TODO: Handle doacross loops when the ordered clause has a parameter.
|
||||
bool isOrdered = loop.ordered_val().hasValue();
|
||||
Optional<omp::ScheduleModifier> scheduleModifier = loop.schedule_modifier();
|
||||
bool isSimd = loop.simd_modifier();
|
||||
|
||||
// The orderedVal refers to the value obtained from the ordered[(n)] clause.
|
||||
// orderedVal == -1: No ordered[(n)] clause specified.
|
||||
// orderedVal == 0: The ordered clause specified without a parameter.
|
||||
// orderedVal > 0: The ordered clause specified with a parameter (n).
|
||||
// TODO: Handle doacross loop init when orderedVal is greater than 0.
|
||||
int64_t orderedVal =
|
||||
loop.ordered_val().hasValue() ? loop.ordered_val().getValue() : -1;
|
||||
if (schedule == omp::ClauseScheduleKind::Static && orderedVal != 0) {
|
||||
ompBuilder->applyWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
|
||||
!loop.nowait(),
|
||||
llvm::omp::OMP_SCHEDULE_Static, chunk);
|
||||
} else {
|
||||
llvm::omp::OMPScheduleType schedType;
|
||||
switch (schedule) {
|
||||
case omp::ClauseScheduleKind::Static:
|
||||
if (loop.schedule_chunk_var())
|
||||
schedType = llvm::omp::OMPScheduleType::OrderedStaticChunked;
|
||||
else
|
||||
schedType = llvm::omp::OMPScheduleType::OrderedStatic;
|
||||
break;
|
||||
case omp::ClauseScheduleKind::Dynamic:
|
||||
if (orderedVal == 0)
|
||||
schedType = llvm::omp::OMPScheduleType::OrderedDynamicChunked;
|
||||
else
|
||||
schedType = llvm::omp::OMPScheduleType::DynamicChunked;
|
||||
break;
|
||||
case omp::ClauseScheduleKind::Guided:
|
||||
if (orderedVal == 0) {
|
||||
schedType = llvm::omp::OMPScheduleType::OrderedGuidedChunked;
|
||||
} else {
|
||||
if (isSimd)
|
||||
schedType = llvm::omp::OMPScheduleType::GuidedSimd;
|
||||
else
|
||||
schedType = llvm::omp::OMPScheduleType::GuidedChunked;
|
||||
}
|
||||
break;
|
||||
case omp::ClauseScheduleKind::Auto:
|
||||
if (orderedVal == 0)
|
||||
schedType = llvm::omp::OMPScheduleType::OrderedAuto;
|
||||
else
|
||||
schedType = llvm::omp::OMPScheduleType::Auto;
|
||||
break;
|
||||
case omp::ClauseScheduleKind::Runtime:
|
||||
if (orderedVal == 0) {
|
||||
schedType = llvm::omp::OMPScheduleType::OrderedRuntime;
|
||||
} else {
|
||||
if (isSimd)
|
||||
schedType = llvm::omp::OMPScheduleType::RuntimeSimd;
|
||||
else
|
||||
schedType = llvm::omp::OMPScheduleType::Runtime;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (Optional<omp::ScheduleModifier> modifier = loop.schedule_modifier()) {
|
||||
switch (*modifier) {
|
||||
case omp::ScheduleModifier::monotonic:
|
||||
schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic;
|
||||
break;
|
||||
case omp::ScheduleModifier::nonmonotonic:
|
||||
schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic;
|
||||
break;
|
||||
default:
|
||||
// Nothing to do here.
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description.
|
||||
// If the static schedule kind is specified or if the ordered clause is
|
||||
// specified, and if the nonmonotonic modifier is not specified, the
|
||||
// effect is as if the monotonic modifier is specified. Otherwise, unless
|
||||
// the monotonic modifier is specified, the effect is as if the
|
||||
// nonmonotonic modifier is specified.
|
||||
// The monotonic is used by default in openmp runtime library, so no need
|
||||
// to set it.
|
||||
if (!(schedType == llvm::omp::OMPScheduleType::OrderedStatic ||
|
||||
schedType == llvm::omp::OMPScheduleType::OrderedStaticChunked))
|
||||
schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic;
|
||||
}
|
||||
|
||||
ompBuilder->applyDynamicWorkshareLoop(ompLoc.DL, loopInfo, allocaIP,
|
||||
schedType, !loop.nowait(), chunk,
|
||||
/*ordered*/ orderedVal == 0);
|
||||
}
|
||||
ompBuilder->applyWorkshareLoop(
|
||||
ompLoc.DL, loopInfo, allocaIP, !loop.nowait(),
|
||||
convertToScheduleKind(schedule), chunk, isSimd,
|
||||
scheduleModifier == omp::ScheduleModifier::monotonic,
|
||||
scheduleModifier == omp::ScheduleModifier::nonmonotonic, isOrdered);
|
||||
|
||||
// Continue building IR after the loop. Note that the LoopInfo returned by
|
||||
// `collapseLoops` points inside the outermost loop and is intended for
|
||||
|
|
|
@ -657,7 +657,7 @@ llvm.func @body(i64)
|
|||
llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () {
|
||||
omp.wsloop schedule(runtime, simd)
|
||||
for (%iv) : i64 = (%lb) to (%ub) step (%step) {
|
||||
// CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 47
|
||||
// CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741871
|
||||
// CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
|
||||
// CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
|
||||
// CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
|
||||
|
@ -674,7 +674,7 @@ llvm.func @body(i64)
|
|||
llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () {
|
||||
omp.wsloop schedule(guided, simd)
|
||||
for (%iv) : i64 = (%lb) to (%ub) step (%step) {
|
||||
// CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 46
|
||||
// CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741870
|
||||
// CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
|
||||
// CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
|
||||
// CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}}
|
||||
|
@ -788,7 +788,7 @@ llvm.func @body(i64)
|
|||
llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
|
||||
omp.wsloop schedule(dynamic) ordered(0)
|
||||
for (%iv) : i64 = (%lb) to (%ub) step (%step) {
|
||||
// CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741891, i64 1, i64 %{{.*}}, i64 1, i64 1)
|
||||
// CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 67, i64 1, i64 %{{.*}}, i64 1, i64 1)
|
||||
// CHECK: call void @__kmpc_dispatch_fini_8u
|
||||
// CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
|
||||
// CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
|
||||
|
@ -806,7 +806,7 @@ llvm.func @body(i64)
|
|||
llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
|
||||
omp.wsloop schedule(auto) ordered(0)
|
||||
for (%iv) : i64 = (%lb) to (%ub) step (%step) {
|
||||
// CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741894, i64 1, i64 %{{.*}}, i64 1, i64 1)
|
||||
// CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 70, i64 1, i64 %{{.*}}, i64 1, i64 1)
|
||||
// CHECK: call void @__kmpc_dispatch_fini_8u
|
||||
// CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
|
||||
// CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
|
||||
|
@ -824,7 +824,7 @@ llvm.func @body(i64)
|
|||
llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
|
||||
omp.wsloop schedule(runtime) ordered(0)
|
||||
for (%iv) : i64 = (%lb) to (%ub) step (%step) {
|
||||
// CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741893, i64 1, i64 %{{.*}}, i64 1, i64 1)
|
||||
// CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 69, i64 1, i64 %{{.*}}, i64 1, i64 1)
|
||||
// CHECK: call void @__kmpc_dispatch_fini_8u
|
||||
// CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
|
||||
// CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
|
||||
|
@ -842,7 +842,7 @@ llvm.func @body(i64)
|
|||
llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
|
||||
omp.wsloop schedule(guided) ordered(0)
|
||||
for (%iv) : i64 = (%lb) to (%ub) step (%step) {
|
||||
// CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741892, i64 1, i64 %{{.*}}, i64 1, i64 1)
|
||||
// CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 68, i64 1, i64 %{{.*}}, i64 1, i64 1)
|
||||
// CHECK: call void @__kmpc_dispatch_fini_8u
|
||||
// CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u
|
||||
// CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0
|
||||
|
|
Loading…
Reference in New Issue