forked from OSchip/llvm-project
[OpenMP] Add options to change Attributor max iterations in OpenMPOpt
This patch adds a new command line option `openmp-opt-max-iterations` that controls the maximum number of iterations the attributor will run for when compiling OpenMP target device code. This patch also adds a remark to indicate when the attributor failed because it did not run for enough iterations. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D110749
This commit is contained in:
parent
bf30c48419
commit
f074a6a041
llvm
|
@ -1415,6 +1415,16 @@ void Attributor::runTillFixpoint() {
|
|||
} while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations ||
|
||||
VerifyMaxFixpointIterations));
|
||||
|
||||
if (IterationCounter > MaxFixedPointIterations && !Worklist.empty()) {
|
||||
auto Remark = [&](OptimizationRemarkMissed ORM) {
|
||||
return ORM << "Attributor did not reach a fixpoint after "
|
||||
<< ore::NV("Iterations", MaxFixedPointIterations)
|
||||
<< " iterations.";
|
||||
};
|
||||
Function *F = Worklist.front()->getIRPosition().getAssociatedFunction();
|
||||
emitRemark<OptimizationRemarkMissed>(F, "FixedPoint", Remark);
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
|
||||
<< IterationCounter << "/" << MaxFixpointIterations
|
||||
<< " iterations\n");
|
||||
|
|
|
@ -109,6 +109,11 @@ static cl::opt<bool>
|
|||
cl::desc("Enables more verbose remarks."), cl::Hidden,
|
||||
cl::init(false));
|
||||
|
||||
static cl::opt<unsigned>
|
||||
SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden,
|
||||
cl::desc("Maximal number of attributor iterations."),
|
||||
cl::init(256));
|
||||
|
||||
STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
|
||||
"Number of OpenMP runtime calls deduplicated");
|
||||
STATISTIC(NumOpenMPParallelRegionsDeleted,
|
||||
|
@ -3348,8 +3353,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
|
|||
if (DisableOpenMPOptStateMachineRewrite)
|
||||
return ChangeStatus::UNCHANGED;
|
||||
|
||||
assert(ReachedKnownParallelRegions.isValidState() &&
|
||||
"Custom state machine with invalid parallel region states?");
|
||||
// Don't rewrite the state machine if we are not in a valid state.
|
||||
if (!ReachedKnownParallelRegions.isValidState())
|
||||
return ChangeStatus::UNCHANGED;
|
||||
|
||||
const int InitModeArgNo = 1;
|
||||
const int InitUseStateMachineArgNo = 2;
|
||||
|
@ -4583,7 +4589,8 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
|
|||
SetVector<Function *> Functions(SCC.begin(), SCC.end());
|
||||
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
|
||||
|
||||
unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
|
||||
unsigned MaxFixpointIterations =
|
||||
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
|
||||
Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
|
||||
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
|
||||
|
||||
|
@ -4646,7 +4653,8 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
|
|||
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
|
||||
/*CGSCC*/ Functions, Kernels);
|
||||
|
||||
unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
|
||||
unsigned MaxFixpointIterations =
|
||||
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
|
||||
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
|
||||
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
|
||||
|
||||
|
@ -4716,7 +4724,8 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
|
|||
Allocator,
|
||||
/*CGSCC*/ Functions, Kernels);
|
||||
|
||||
unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
|
||||
unsigned MaxFixpointIterations =
|
||||
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
|
||||
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
|
||||
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
|
||||
|
||||
|
|
|
@ -1,14 +1,18 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
|
||||
; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS
|
||||
; RUN: opt -passes=openmp-opt -pass-remarks-missed=openmp-opt -openmp-opt-max-iterations=1 -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-FIXPOINT
|
||||
; RUN: opt -openmp-opt-disable-deglobalization -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=CHECK-DISABLED
|
||||
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64"
|
||||
|
||||
; UTC_ARGS: --disable
|
||||
; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override.
|
||||
; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack.
|
||||
; CHECK-REMARKS: remark: remove_globalization.c:6:2: Moving globalized variable to the stack.
|
||||
; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
|
||||
; CHECK-FIXPOINT: Attributor did not reach a fixpoint after 1 iterations.
|
||||
; UTC_ARGS: --enable
|
||||
|
||||
@S = external local_unnamed_addr global i8*
|
||||
|
||||
|
|
Loading…
Reference in New Issue