[OpenMP] Add options to change Attributor max iterations in OpenMPOpt

This patch adds a new command line option `openmp-opt-max-iterations`
that controls the maximum number of iterations the attributor will run
for when compiling OpenMP target device code. This patch also adds a
remark to indicate when the attributor failed because it did not run
for enough iterations.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D110749
This commit is contained in:
Joseph Huber 2021-09-29 13:45:07 -04:00
parent bf30c48419
commit f074a6a041
3 changed files with 28 additions and 5 deletions

View File

@ -1415,6 +1415,16 @@ void Attributor::runTillFixpoint() {
} while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations || } while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations ||
VerifyMaxFixpointIterations)); VerifyMaxFixpointIterations));
if (IterationCounter > MaxFixedPointIterations && !Worklist.empty()) {
auto Remark = [&](OptimizationRemarkMissed ORM) {
return ORM << "Attributor did not reach a fixpoint after "
<< ore::NV("Iterations", MaxFixedPointIterations)
<< " iterations.";
};
Function *F = Worklist.front()->getIRPosition().getAssociatedFunction();
emitRemark<OptimizationRemarkMissed>(F, "FixedPoint", Remark);
}
LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: " LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
<< IterationCounter << "/" << MaxFixpointIterations << IterationCounter << "/" << MaxFixpointIterations
<< " iterations\n"); << " iterations\n");

View File

@ -109,6 +109,11 @@ static cl::opt<bool>
cl::desc("Enables more verbose remarks."), cl::Hidden, cl::desc("Enables more verbose remarks."), cl::Hidden,
cl::init(false)); cl::init(false));
static cl::opt<unsigned>
SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden,
cl::desc("Maximal number of attributor iterations."),
cl::init(256));
STATISTIC(NumOpenMPRuntimeCallsDeduplicated, STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
"Number of OpenMP runtime calls deduplicated"); "Number of OpenMP runtime calls deduplicated");
STATISTIC(NumOpenMPParallelRegionsDeleted, STATISTIC(NumOpenMPParallelRegionsDeleted,
@ -3348,8 +3353,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (DisableOpenMPOptStateMachineRewrite) if (DisableOpenMPOptStateMachineRewrite)
return ChangeStatus::UNCHANGED; return ChangeStatus::UNCHANGED;
assert(ReachedKnownParallelRegions.isValidState() && // Don't rewrite the state machine if we are not in a valid state.
"Custom state machine with invalid parallel region states?"); if (!ReachedKnownParallelRegions.isValidState())
return ChangeStatus::UNCHANGED;
const int InitModeArgNo = 1; const int InitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2; const int InitUseStateMachineArgNo = 2;
@ -4583,7 +4589,8 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
SetVector<Function *> Functions(SCC.begin(), SCC.end()); SetVector<Function *> Functions(SCC.begin(), SCC.end());
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels); OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
MaxFixpointIterations, OREGetter, DEBUG_TYPE); MaxFixpointIterations, OREGetter, DEBUG_TYPE);
@ -4646,7 +4653,8 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
/*CGSCC*/ Functions, Kernels); /*CGSCC*/ Functions, Kernels);
unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
MaxFixpointIterations, OREGetter, DEBUG_TYPE); MaxFixpointIterations, OREGetter, DEBUG_TYPE);
@ -4716,7 +4724,8 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
Allocator, Allocator,
/*CGSCC*/ Functions, Kernels); /*CGSCC*/ Functions, Kernels);
unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
MaxFixpointIterations, OREGetter, DEBUG_TYPE); MaxFixpointIterations, OREGetter, DEBUG_TYPE);

View File

@ -1,14 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS ; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS
; RUN: opt -passes=openmp-opt -pass-remarks-missed=openmp-opt -openmp-opt-max-iterations=1 -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-FIXPOINT
; RUN: opt -openmp-opt-disable-deglobalization -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=CHECK-DISABLED ; RUN: opt -openmp-opt-disable-deglobalization -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=CHECK-DISABLED
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64" target triple = "nvptx64"
; UTC_ARGS: --disable
; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override. ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override.
; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack. ; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack.
; CHECK-REMARKS: remark: remove_globalization.c:6:2: Moving globalized variable to the stack. ; CHECK-REMARKS: remark: remove_globalization.c:6:2: Moving globalized variable to the stack.
; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
; CHECK-FIXPOINT: Attributor did not reach a fixpoint after 1 iterations.
; UTC_ARGS: --enable
@S = external local_unnamed_addr global i8* @S = external local_unnamed_addr global i8*