OpenMPOpt Remarks Support

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D79359
This commit is contained in:
Huber, Joseph 2020-05-13 12:19:02 -05:00 committed by Johannes Doerfert
parent 60f443bb3b
commit 4d4ea9ac59
3 changed files with 264 additions and 4 deletions

View File

@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/InitializePasses.h"
@ -50,11 +51,14 @@ static constexpr auto TAG = "[" DEBUG_TYPE "]";
namespace {
struct OpenMPOpt {
using OptimizationRemarkGetter =
function_ref<OptimizationRemarkEmitter &(Function *)>;
OpenMPOpt(SmallVectorImpl<Function *> &SCC,
SmallPtrSetImpl<Function *> &ModuleSlice,
CallGraphUpdater &CGUpdater)
CallGraphUpdater &CGUpdater, OptimizationRemarkGetter OREGetter)
: M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice),
OMPBuilder(M), CGUpdater(CGUpdater) {
OMPBuilder(M), CGUpdater(CGUpdater), OREGetter(OREGetter) {
initializeTypes(M);
initializeRuntimeFunctions();
OMPBuilder.initialize();
@ -178,6 +182,15 @@ private:
LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
<< CI->getCaller()->getName() << "\n");
auto Remark = [&](OptimizationRemark OR) {
return OR << "Parallel region in "
<< ore::NV("OpenMPParallelDelete", CI->getCaller()->getName())
<< " deleted";
};
emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion",
Remark);
CGUpdater.removeCallSite(*CI);
CI->eraseFromParent();
Changed = true;
@ -317,6 +330,15 @@ private:
if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
if (!CanBeMoved(*CI))
continue;
auto Remark = [&](OptimizationRemark OR) {
auto newLoc = &*F.getEntryBlock().getFirstInsertionPt();
return OR << "OpenMP runtime call "
<< ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to "
<< ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc());
};
emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark);
CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
ReplVal = CI;
break;
@ -343,6 +365,13 @@ private:
if (!CI || CI == ReplVal || &F != &Caller)
return false;
assert(CI->getCaller() == &F && "Unexpected call!");
auto Remark = [&](OptimizationRemark OR) {
return OR << "OpenMP runtime call "
<< ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated";
};
emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark);
CGUpdater.removeCallSite(*CI);
CI->replaceAllUsesWith(ReplVal);
CI->eraseFromParent();
@ -508,6 +537,29 @@ private:
// TODO: We should attach the attributes defined in OMPKinds.def.
}
/// Emit a remark generically
///
/// This template function can be used to generically emit a remark. The
/// RemarkKind should be one of the following:
/// - OptimizationRemark to indicate a successful optimization attempt
/// - OptimizationRemarkMissed to report a failed optimization attempt
/// - OptimizationRemarkAnalysis to provide additional information about an
/// optimization attempt
///
/// The remark is built using a callback function provided by the caller that
/// takes a RemarkKind as input and returns a RemarkKind.
template <typename RemarkKind,
typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>>
void emitRemark(Instruction *Inst, StringRef RemarkName,
RemarkCallBack &&RemarkCB) {
Function *F = Inst->getParent()->getParent();
auto &ORE = OREGetter(F);
ORE.emit([&]() {
return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst));
});
}
/// The underyling module.
Module &M;
@ -524,6 +576,9 @@ private:
/// the second an optional replacement call.
CallGraphUpdater &CGUpdater;
/// Callback to get an OptimizationRemarkEmitter from a Function *
OptimizationRemarkGetter OREGetter;
/// Map from runtime function kind to the runtime function description.
EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
RuntimeFunction::OMPRTL___last>
@ -550,10 +605,16 @@ PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
if (SCC.empty())
return PreservedAnalyses::all();
auto OREGetter = [&C, &CG, &AM](Function *F) -> OptimizationRemarkEmitter & {
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
};
CallGraphUpdater CGUpdater;
CGUpdater.initialize(CG, C, AM, UR);
// TODO: Compute the module slice we are allowed to look at.
OpenMPOpt OMPOpt(SCC, ModuleSlice, CGUpdater);
OpenMPOpt OMPOpt(SCC, ModuleSlice, CGUpdater, OREGetter);
bool Changed = OMPOpt.run();
(void)Changed;
return PreservedAnalyses::all();
@ -601,8 +662,17 @@ struct OpenMPOptLegacyPass : public CallGraphSCCPass {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
CGUpdater.initialize(CG, CGSCC);
// Maintain a map of functions to avoid rebuilding the ORE
DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
if (!ORE)
ORE = std::make_unique<OptimizationRemarkEmitter>(F);
return *ORE;
};
// TODO: Compute the module slice we are allowed to look at.
OpenMPOpt OMPOpt(SCC, ModuleSlice, CGUpdater);
OpenMPOpt OMPOpt(SCC, ModuleSlice, CGUpdater, OREGetter);
return OMPOpt.run();
}

View File

@ -0,0 +1,64 @@
; RUN: opt -openmpopt -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s
; RUN: opt -passes=openmpopt -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s
; ModuleID = 'deduplication_remarks.c'
source_filename = "deduplication_remarks.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
%struct.ident_t = type { i32, i32, i32, i32, i8* }
@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8
@.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
; CHECK: remark: deduplication_remarks.c:9:10: OpenMP runtime call __kmpc_global_thread_num moved to deduplication_remarks.c:5:10
; CHECK: remark: deduplication_remarks.c:7:10: OpenMP runtime call __kmpc_global_thread_num deduplicated
; CHECK: remark: deduplication_remarks.c:5:10: OpenMP runtime call __kmpc_global_thread_num deduplicated
define dso_local void @deduplicate() local_unnamed_addr !dbg !14 {
%1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0), !dbg !21
call void @useI32(i32 %1), !dbg !23
%2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0), !dbg !24
call void @useI32(i32 %2), !dbg !25
%3 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0), !dbg !26
call void @useI32(i32 %3), !dbg !27
ret void, !dbg !28
}
declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
declare !dbg !4 void @useI32(i32) local_unnamed_addr
declare void @llvm.dbg.value(metadata, metadata, metadata)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9, !10, !11, !12}
!llvm.ident = !{!13}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "deduplication_remarks.c", directory: "/tmp")
!2 = !{}
!3 = !{!4}
!4 = !DISubprogram(name: "useI32", scope: !1, file: !1, line: 1, type: !5, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null, !7}
!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!8 = !{i32 7, !"Dwarf Version", i32 4}
!9 = !{i32 2, !"Debug Info Version", i32 3}
!10 = !{i32 1, !"wchar_size", i32 4}
!11 = !{i32 7, !"PIC Level", i32 2}
!12 = !{i32 7, !"PIE Level", i32 2}
!13 = !{!"clang version 10.0.0 "}
!14 = distinct !DISubprogram(name: "deduplicate", scope: !1, file: !1, line: 4, type: !15, scopeLine: 4, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !17)
!15 = !DISubroutineType(types: !16)
!16 = !{null}
!17 = !{!18, !19, !20}
!18 = !DILocalVariable(name: "x", scope: !14, file: !1, line: 5, type: !7)
!19 = !DILocalVariable(name: "y", scope: !14, file: !1, line: 7, type: !7)
!20 = !DILocalVariable(name: "z", scope: !14, file: !1, line: 9, type: !7)
!21 = !DILocation(line: 5, column: 10, scope: !14)
!22 = !DILocation(line: 0, scope: !14)
!23 = !DILocation(line: 6, column: 2, scope: !14)
!24 = !DILocation(line: 7, column: 10, scope: !14)
!25 = !DILocation(line: 8, column: 2, scope: !14)
!26 = !DILocation(line: 9, column: 10, scope: !14)
!27 = !DILocation(line: 10, column: 2, scope: !14)
!28 = !DILocation(line: 13, column: 1, scope: !14)

View File

@ -0,0 +1,126 @@
; RUN: opt -S -pass-remarks=openmp-opt -attributor -openmpopt -disable-output < %s 2>&1 | FileCheck %s
; RUN: opt -S -pass-remarks=openmp-opt -passes='attributor,cgscc(openmpopt)' -disable-output < %s 2>&1 | FileCheck %s
; ModuleID = 'parallel_deletion_remarks.ll'
source_filename = "parallel_deletion_remarks.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
%struct.ident_t = type { i32, i32, i32, i32, i8* }
@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
; void delete_parallel(void) {
; #pragma omp parallel
; { unknown_willreturn(); }
; #pragma omp parallel
; { readonly_willreturn(); }
; #pragma omp parallel
; { readnone_willreturn(); }
; #pragma omp parallel
; {}
; }
;
; This will delete all but the first parallel region
; CHECK: remark: parallel_deletion_remarks.c:14:1: Parallel region in delete_parallel deleted
; CHECK: remark: parallel_deletion_remarks.c:12:1: Parallel region in delete_parallel deleted
; CHECK: remark: parallel_deletion_remarks.c:10:1: Parallel region in delete_parallel deleted
define dso_local void @delete_parallel() local_unnamed_addr !dbg !15 {
call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)), !dbg !18
call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)), !dbg !19
call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)), !dbg !20
call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*)), !dbg !21
ret void, !dbg !22
}
declare !callback !23 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) local_unnamed_addr
; Function Attrs: willreturn
declare !dbg !4 void @unknown_willreturn(...) #0
; Function Attrs: readonly willreturn
declare !dbg !7 void @readonly_willreturn(...) #1
; Function Attrs: readnone willreturn
declare !dbg !8 void @readnone_willreturn(...) #2
define internal void @.omp_outlined.(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1) !dbg !25 {
call void (...) @unknown_willreturn(), !dbg !36
ret void, !dbg !36
}
define internal void @.omp_outlined..2(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1) !dbg !37 {
call void (...) @readonly_willreturn(), !dbg !41
ret void, !dbg !41
}
define internal void @.omp_outlined..4(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1) !dbg !42 {
call void (...) @readnone_willreturn(), !dbg !46
ret void, !dbg !46
}
define internal void @.omp_outlined..6(i32* noalias nocapture %0, i32* noalias nocapture %1) !dbg !47 {
ret void, !dbg !51
}
attributes #0 = { willreturn }
attributes #1 = { readonly willreturn }
attributes #2 = { readnone willreturn }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10, !11, !12, !13}
!llvm.ident = !{!14}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "parallel_deletion_remarks.c", directory: "/tmp")
!2 = !{}
!3 = !{!4, !7, !8}
!4 = !DISubprogram(name: "unknown_willreturn", scope: !1, file: !1, line: 3, type: !5, spFlags: DISPFlagOptimized, retainedNodes: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null, null}
!7 = !DISubprogram(name: "readonly_willreturn", scope: !1, file: !1, line: 4, type: !5, spFlags: DISPFlagOptimized, retainedNodes: !2)
!8 = !DISubprogram(name: "readnone_willreturn", scope: !1, file: !1, line: 5, type: !5, spFlags: DISPFlagOptimized, retainedNodes: !2)
!9 = !{i32 7, !"Dwarf Version", i32 4}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{i32 1, !"wchar_size", i32 4}
!12 = !{i32 7, !"PIC Level", i32 2}
!13 = !{i32 7, !"PIE Level", i32 2}
!14 = !{!"clang version 10.0.0 "}
!15 = distinct !DISubprogram(name: "delete_parallel", scope: !1, file: !1, line: 7, type: !16, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!16 = !DISubroutineType(types: !17)
!17 = !{null}
!18 = !DILocation(line: 8, column: 1, scope: !15)
!19 = !DILocation(line: 10, column: 1, scope: !15)
!20 = !DILocation(line: 12, column: 1, scope: !15)
!21 = !DILocation(line: 14, column: 1, scope: !15)
!22 = !DILocation(line: 16, column: 1, scope: !15)
!23 = !{!24}
!24 = !{i64 2, i64 -1, i64 -1, i1 true}
!25 = distinct !DISubprogram(name: ".omp_outlined.", scope: !1, file: !1, line: 9, type: !26, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !33)
!26 = !DISubroutineType(types: !27)
!27 = !{null, !28, !28}
!28 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !29)
!29 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !30)
!30 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !31, size: 64)
!31 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !32)
!32 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!33 = !{!34, !35}
!34 = !DILocalVariable(name: ".global_tid.", arg: 1, scope: !25, type: !28, flags: DIFlagArtificial)
!35 = !DILocalVariable(name: ".bound_tid.", arg: 2, scope: !25, type: !28, flags: DIFlagArtificial)
!36 = !DILocation(line: 9, column: 2, scope: !25)
!37 = distinct !DISubprogram(name: ".omp_outlined..2", scope: !1, file: !1, line: 11, type: !26, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !38)
!38 = !{!39, !40}
!39 = !DILocalVariable(name: ".global_tid.", arg: 1, scope: !37, type: !28, flags: DIFlagArtificial)
!40 = !DILocalVariable(name: ".bound_tid.", arg: 2, scope: !37, type: !28, flags: DIFlagArtificial)
!41 = !DILocation(line: 11, column: 2, scope: !37)
!42 = distinct !DISubprogram(name: ".omp_outlined..4", scope: !1, file: !1, line: 13, type: !26, scopeLine: 13, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !43)
!43 = !{!44, !45}
!44 = !DILocalVariable(name: ".global_tid.", arg: 1, scope: !42, type: !28, flags: DIFlagArtificial)
!45 = !DILocalVariable(name: ".bound_tid.", arg: 2, scope: !42, type: !28, flags: DIFlagArtificial)
!46 = !DILocation(line: 13, column: 2, scope: !42)
!47 = distinct !DISubprogram(name: ".omp_outlined..6", scope: !1, file: !1, line: 15, type: !26, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !48)
!48 = !{!49, !50}
!49 = !DILocalVariable(name: ".global_tid.", arg: 1, scope: !47, type: !28, flags: DIFlagArtificial)
!50 = !DILocalVariable(name: ".bound_tid.", arg: 2, scope: !47, type: !28, flags: DIFlagArtificial)
!51 = !DILocation(line: 15, column: 2, scope: !47)