From f074a6a041936cc5a93f0ca4fb6c2f6ed9895350 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 29 Sep 2021 13:45:07 -0400 Subject: [PATCH] [OpenMP] Add options to change Attributor max iterations in OpenMPOpt This patch adds a new command line option `openmp-opt-max-iterations` that controls the maximum number of iterations the attributor will run for when compiling OpenMP target device code. This patch also adds a remark to indicate when the attributor failed because it did not run for enough iterations. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D110749 --- llvm/lib/Transforms/IPO/Attributor.cpp | 10 ++++++++++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 19 ++++++++++++++----- .../Transforms/OpenMP/remove_globalization.ll | 4 ++++ 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 28493f87f921..97771d83a7ef 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1415,6 +1415,16 @@ void Attributor::runTillFixpoint() { } while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations || VerifyMaxFixpointIterations)); + if (IterationCounter > MaxFixedPointIterations && !Worklist.empty()) { + auto Remark = [&](OptimizationRemarkMissed ORM) { + return ORM << "Attributor did not reach a fixpoint after " + << ore::NV("Iterations", MaxFixedPointIterations) + << " iterations."; + }; + Function *F = Worklist.front()->getIRPosition().getAssociatedFunction(); + emitRemark(F, "FixedPoint", Remark); + } + LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: " << IterationCounter << "/" << MaxFixpointIterations << " iterations\n"); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index a28435ab7808..5f14fe48d0e7 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -109,6 +109,11 @@ static cl::opt cl::desc("Enables more verbose remarks."), cl::Hidden, cl::init(false)); +static cl::opt + SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden, + cl::desc("Maximal number of attributor iterations."), + cl::init(256)); + STATISTIC(NumOpenMPRuntimeCallsDeduplicated, "Number of OpenMP runtime calls deduplicated"); STATISTIC(NumOpenMPParallelRegionsDeleted, @@ -3348,8 +3353,9 @@ struct AAKernelInfoFunction : AAKernelInfo { if (DisableOpenMPOptStateMachineRewrite) return ChangeStatus::UNCHANGED; - assert(ReachedKnownParallelRegions.isValidState() && - "Custom state machine with invalid parallel region states?"); + // Don't rewrite the state machine if we are not in a valid state. + if (!ReachedKnownParallelRegions.isValidState()) + return ChangeStatus::UNCHANGED; const int InitModeArgNo = 1; const int InitUseStateMachineArgNo = 2; @@ -4583,7 +4589,8 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { SetVector Functions(SCC.begin(), SCC.end()); OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; + unsigned MaxFixpointIterations = + (isOpenMPDevice(M)) ? SetFixpointIterations : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, MaxFixpointIterations, OREGetter, DEBUG_TYPE); @@ -4646,7 +4653,8 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; + unsigned MaxFixpointIterations = + (isOpenMPDevice(M)) ? SetFixpointIterations : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, MaxFixpointIterations, OREGetter, DEBUG_TYPE); @@ -4716,7 +4724,8 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; + unsigned MaxFixpointIterations = + (isOpenMPDevice(M)) ? SetFixpointIterations : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, MaxFixpointIterations, OREGetter, DEBUG_TYPE); diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll index e718db3751f4..8ecae7f5af9a 100644 --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -1,14 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s ; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS +; RUN: opt -passes=openmp-opt -pass-remarks-missed=openmp-opt -openmp-opt-max-iterations=1 -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-FIXPOINT ; RUN: opt -openmp-opt-disable-deglobalization -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=CHECK-DISABLED target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" +; UTC_ARGS: --disable ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override. ; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack. ; CHECK-REMARKS: remark: remove_globalization.c:6:2: Moving globalized variable to the stack. ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. +; CHECK-FIXPOINT: Attributor did not reach a fixpoint after 1 iterations. +; UTC_ARGS: --enable @S = external local_unnamed_addr global i8*