forked from OSchip/llvm-project
[OpenMP][FIX] Do not crash when kernels are debug wrapper functions
With debug information enabled (-g) Clang will wrap the actual target region into a new function which is called from the "kernel". The problem is that the "kernel" is now basically a wrapper without all the things we expect. More importantly, if we end up asking for an AAKernelInfo for the "target region function" we might try to turn it into SPMD mode. That used to cause an assertion as that function doesn't have an appropriately named `_exec_mode` global. While the global is going away soon we still need to make sure to properly handle this case, e.g., perform optimizations reliably. Differential Revision: https://reviews.llvm.org/D122043
This commit is contained in:
parent
d155c7da51
commit
4166738c38
|
@ -3168,12 +3168,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
|
|||
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
|
||||
|
||||
Function *Fn = getAnchorScope();
|
||||
if (!OMPInfoCache.Kernels.count(Fn))
|
||||
return;
|
||||
|
||||
// Add itself to the reaching kernel and set IsKernelEntry.
|
||||
ReachingKernelEntries.insert(Fn);
|
||||
IsKernelEntry = true;
|
||||
|
||||
OMPInformationCache::RuntimeFunctionInfo &InitRFI =
|
||||
OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
|
||||
|
@ -3207,10 +3201,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
|
|||
Fn);
|
||||
|
||||
// Ignore kernels without initializers such as global constructors.
|
||||
if (!KernelInitCB || !KernelDeinitCB) {
|
||||
indicateOptimisticFixpoint();
|
||||
if (!KernelInitCB || !KernelDeinitCB)
|
||||
return;
|
||||
}
|
||||
|
||||
// Add itself to the reaching kernel and set IsKernelEntry.
|
||||
ReachingKernelEntries.insert(Fn);
|
||||
IsKernelEntry = true;
|
||||
|
||||
// For kernels we might need to initialize/finalize the IsSPMD state and
|
||||
// we need to register a simplification callback so that the Attributor
|
||||
|
@ -3376,8 +3372,17 @@ struct AAKernelInfoFunction : AAKernelInfo {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Check if the kernel is already in SPMD mode, if so, return success.
|
||||
// Get the actual kernel, could be the caller of the anchor scope if we have
|
||||
// a debug wrapper.
|
||||
Function *Kernel = getAnchorScope();
|
||||
if (Kernel->hasLocalLinkage()) {
|
||||
assert(Kernel->hasOneUse() && "Unexpected use of debug kernel wrapper.");
|
||||
auto *CB = cast<CallBase>(Kernel->user_back());
|
||||
Kernel = CB->getCaller();
|
||||
}
|
||||
assert(OMPInfoCache.Kernels.count(Kernel) && "Expected kernel function!");
|
||||
|
||||
// Check if the kernel is already in SPMD mode, if so, return success.
|
||||
GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
|
||||
(Kernel->getName() + "_exec_mode").str());
|
||||
assert(ExecMode && "Kernel without exec mode?");
|
||||
|
@ -4731,18 +4736,23 @@ void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) {
|
|||
|
||||
void OpenMPOpt::registerAAs(bool IsModulePass) {
|
||||
if (SCC.empty())
|
||||
|
||||
return;
|
||||
|
||||
if (IsModulePass) {
|
||||
// Ensure we create the AAKernelInfo AAs first and without triggering an
|
||||
// update. This will make sure we register all value simplification
|
||||
// callbacks before any other AA has the chance to create an AAValueSimplify
|
||||
// or similar.
|
||||
for (Function *Kernel : OMPInfoCache.Kernels)
|
||||
auto CreateKernelInfoCB = [&](Use &, Function &Kernel) {
|
||||
A.getOrCreateAAFor<AAKernelInfo>(
|
||||
IRPosition::function(*Kernel), /* QueryingAA */ nullptr,
|
||||
IRPosition::function(Kernel), /* QueryingAA */ nullptr,
|
||||
DepClassTy::NONE, /* ForceUpdate */ false,
|
||||
/* UpdateAfterInit */ false);
|
||||
return false;
|
||||
};
|
||||
OMPInformationCache::RuntimeFunctionInfo &InitRFI =
|
||||
OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
|
||||
InitRFI.foreachUse(SCC, CreateKernelInfoCB);
|
||||
|
||||
registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id);
|
||||
registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
|
||||
|
|
|
@ -18,9 +18,12 @@ target triple = "nvptx64"
|
|||
@S = external local_unnamed_addr global i8*
|
||||
@0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
|
||||
@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @0, i32 0, i32 0) }, align 8
|
||||
@foo_exec_mode = weak constant i8 1
|
||||
@bar_exec_mode = weak constant i8 1
|
||||
@baz_spmd_exec_mode = weak constant i8 2
|
||||
|
||||
|
||||
define dso_local void @foo() {
|
||||
define dso_local void @foo() "kernel" {
|
||||
entry:
|
||||
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
|
||||
%x = call align 4 i8* @__kmpc_alloc_shared(i64 4)
|
||||
|
@ -33,7 +36,7 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @bar() {
|
||||
define void @bar() "kernel" {
|
||||
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true)
|
||||
call void @unknown_no_openmp()
|
||||
%cmp = icmp eq i32 %c, -1
|
||||
|
@ -61,7 +64,7 @@ exit:
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @baz_spmd() {
|
||||
define void @baz_spmd() "kernel" {
|
||||
%c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 true, i1 true)
|
||||
call void @unknown_no_openmp()
|
||||
%c0 = icmp eq i32 %c, -1
|
||||
|
@ -110,7 +113,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
|
|||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!3, !4, !5, !6}
|
||||
!nvvm.annotations = !{!7, !8}
|
||||
!nvvm.annotations = !{!7, !8, !13}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
|
||||
!1 = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c")
|
||||
|
@ -130,58 +133,64 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
|
|||
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
|
||||
; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [113 x i8] c"
|
||||
; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
|
||||
; CHECK: @[[FOO_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
|
||||
; CHECK: @[[BAR_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
|
||||
; CHECK: @[[BAZ_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
|
||||
; CHECK: @[[OFFSET:[a-zA-Z0-9_$"\\.-]+]] = global i32 undef
|
||||
; CHECK: @[[STACK:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [1024 x i8] undef
|
||||
; CHECK: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] undef, align 4
|
||||
; CHECK: @[[Y_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
|
||||
;.
|
||||
; CHECK-LABEL: define {{[^@]+}}@foo() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@foo
|
||||
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
|
||||
; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]]
|
||||
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5:[0-9]+]]
|
||||
; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR7:[0-9]+]]
|
||||
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR6:[0-9]+]]
|
||||
; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[X]] to i32*
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[X_ON_STACK]] to i8*
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[TMP0]]) #[[ATTR7:[0-9]+]]
|
||||
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR8:[0-9]+]]
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[TMP0]]) #[[ATTR8:[0-9]+]]
|
||||
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR9:[0-9]+]]
|
||||
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
;
|
||||
; CHECK-LABEL: define {{[^@]+}}@bar() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@bar
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
|
||||
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5]]
|
||||
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR6]]
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1
|
||||
; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]]
|
||||
; CHECK: master1:
|
||||
; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*) to [4 x i32]*
|
||||
; CHECK-NEXT: [[A0:%.*]] = bitcast [4 x i32]* [[X_ON_STACK]] to i8*
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[A0]]) #[[ATTR7]]
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[A0]]) #[[ATTR8]]
|
||||
; CHECK-NEXT: br label [[NEXT:%.*]]
|
||||
; CHECK: next:
|
||||
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5]]
|
||||
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR6]]
|
||||
; CHECK-NEXT: br label [[MASTER2:%.*]]
|
||||
; CHECK: master2:
|
||||
; CHECK-NEXT: [[Y_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*) to [4 x i32]*
|
||||
; CHECK-NEXT: [[B1:%.*]] = bitcast [4 x i32]* [[Y_ON_STACK]] to i8*
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[B1]]) #[[ATTR7]]
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[B1]]) #[[ATTR8]]
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
;
|
||||
; CHECK-LABEL: define {{[^@]+}}@baz_spmd() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@baz_spmd
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 true, i1 true)
|
||||
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5]]
|
||||
; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR6]]
|
||||
; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1
|
||||
; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]]
|
||||
; CHECK: master3:
|
||||
; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG9:![0-9]+]]
|
||||
; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR7]], !dbg [[DBG10:![0-9]+]]
|
||||
; CHECK-NEXT: [[Z_ON_STACK:%.*]] = bitcast i8* [[Z]] to [6 x i32]*
|
||||
; CHECK-NEXT: [[C1:%.*]] = bitcast [6 x i32]* [[Z_ON_STACK]] to i8*
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[C1]]) #[[ATTR7]]
|
||||
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR8]]
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[C1]]) #[[ATTR8]]
|
||||
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR9]]
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
|
||||
|
@ -190,7 +199,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
|
|||
;
|
||||
; CHECK: Function Attrs: nofree nounwind writeonly
|
||||
; CHECK-LABEL: define {{[^@]+}}@use.internalized
|
||||
; CHECK-SAME: (i8* nofree [[X:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-SAME: (i8* nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: store i8* [[X]], i8** @S, align 8
|
||||
; CHECK-NEXT: ret void
|
||||
|
@ -204,21 +213,22 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
|
|||
;
|
||||
;
|
||||
; CHECK-LABEL: define {{[^@]+}}@__kmpc_alloc_shared
|
||||
; CHECK-SAME: (i64 [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-SAME: (i64 [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
; CHECK-NEXT: [[L:%.*]] = load i32, i32* @offset, align 4
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([1024 x i8], [1024 x i8] addrspace(3)* @stack, i32 0, i32 0) to i8*), i32 [[L]]
|
||||
; CHECK-NEXT: ret i8* [[GEP]]
|
||||
;
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { nofree nounwind writeonly }
|
||||
; CHECK: attributes #[[ATTR1]] = { nosync nounwind readonly allocsize(0) }
|
||||
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nosync nounwind }
|
||||
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind readnone speculatable }
|
||||
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nofree nosync nounwind readnone speculatable willreturn }
|
||||
; CHECK: attributes #[[ATTR5]] = { "llvm.assume"="omp_no_openmp" }
|
||||
; CHECK: attributes #[[ATTR6]] = { nounwind readonly }
|
||||
; CHECK: attributes #[[ATTR7]] = { nounwind writeonly }
|
||||
; CHECK: attributes #[[ATTR8]] = { nounwind }
|
||||
; CHECK: attributes #[[ATTR0]] = { "kernel" }
|
||||
; CHECK: attributes #[[ATTR1]] = { nofree nounwind writeonly }
|
||||
; CHECK: attributes #[[ATTR2]] = { nosync nounwind readonly allocsize(0) }
|
||||
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind }
|
||||
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nounwind readnone speculatable }
|
||||
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind readnone speculatable willreturn }
|
||||
; CHECK: attributes #[[ATTR6]] = { "llvm.assume"="omp_no_openmp" }
|
||||
; CHECK: attributes #[[ATTR7]] = { nounwind readonly }
|
||||
; CHECK: attributes #[[ATTR8]] = { nounwind writeonly }
|
||||
; CHECK: attributes #[[ATTR9]] = { nounwind }
|
||||
;.
|
||||
; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
|
||||
; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c")
|
||||
|
@ -229,7 +239,8 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
|
|||
; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
|
||||
; CHECK: [[META7:![0-9]+]] = !{void ()* @foo, !"kernel", i32 1}
|
||||
; CHECK: [[META8:![0-9]+]] = !{void ()* @bar, !"kernel", i32 1}
|
||||
; CHECK: [[DBG9]] = !DILocation(line: 5, column: 14, scope: !10)
|
||||
; CHECK: [[META10:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
; CHECK: [[META11:![0-9]+]] = !DISubroutineType(types: !2)
|
||||
; CHECK: [[META9:![0-9]+]] = !{void ()* @baz_spmd, !"kernel", i32 1}
|
||||
; CHECK: [[DBG10]] = !DILocation(line: 5, column: 14, scope: !11)
|
||||
; CHECK: [[META11:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !12, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
|
||||
; CHECK: [[META12:![0-9]+]] = !DISubroutineType(types: !2)
|
||||
;.
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue