forked from OSchip/llvm-project
[GlobalsModRef][FIX] Ensure we honor synchronizing effects of intrinsics
This is a long standing problem that resurfaces once in a while [0].
There might actually be two problems because I'm not 100% sure if the
issue underlying https://reviews.llvm.org/D115302 would be solved by
this or not. Anyway.
In 2008 we thought intrinsics do not read/write globals passed to them:
d4133ac315
This is not correct given that intrinsics can synchronize threads and
cause effects to effectively become visible.
NOTE: I did not yet modify any tests but only tried out the reproducer
of https://github.com/llvm/llvm-project/issues/54851.
Fixes: https://github.com/llvm/llvm-project/issues/54851
[0] https://discourse.llvm.org/t/bug-gvn-memdep-bug-in-the-presence-of-intrinsics/59402
Differential Revision: https://reviews.llvm.org/D123531
This commit is contained in:
parent
0f070bee82
commit
9dc7da3f9c
|
@ -511,6 +511,18 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
|
|||
Handles.front().I = Handles.begin();
|
||||
bool KnowNothing = false;
|
||||
|
||||
// Intrinsics, like any other synchronizing function, can make effects
|
||||
// of other threads visible. Without nosync we know nothing really.
|
||||
// Similarly, if `nocallback` is missing the function, or intrinsic,
|
||||
// can call into the module arbitrarily. If both are set the function
|
||||
// has an effect but will not interact with accesses of internal
|
||||
// globals inside the module. We are conservative here for optnone
|
||||
// functions, might not be necessary.
|
||||
auto MaySyncOrCallIntoModule = [](const Function &F) {
|
||||
return !F.isDeclaration() || !F.hasNoSync() ||
|
||||
!F.hasFnAttribute(Attribute::NoCallback);
|
||||
};
|
||||
|
||||
// Collect the mod/ref properties due to called functions. We only compute
|
||||
// one mod-ref set.
|
||||
for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
|
||||
|
@ -525,7 +537,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
|
|||
// Can't do better than that!
|
||||
} else if (F->onlyReadsMemory()) {
|
||||
FI.addModRefInfo(ModRefInfo::Ref);
|
||||
if (!F->isIntrinsic() && !F->onlyAccessesArgMemory())
|
||||
if (!F->onlyAccessesArgMemory() && MaySyncOrCallIntoModule(*F))
|
||||
// This function might call back into the module and read a global -
|
||||
// consider every global as possibly being read by this function.
|
||||
FI.setMayReadAnyGlobal();
|
||||
|
@ -533,7 +545,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
|
|||
FI.addModRefInfo(ModRefInfo::ModRef);
|
||||
if (!F->onlyAccessesArgMemory())
|
||||
FI.setMayReadAnyGlobal();
|
||||
if (!F->isIntrinsic()) {
|
||||
if (MaySyncOrCallIntoModule(*F)) {
|
||||
KnowNothing = true;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s
|
||||
; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
|
||||
;
|
||||
; Functions w/o `nosync` attribute may communicate via memory and must be
|
||||
; treated conservatively. Taken from https://reviews.llvm.org/D115302.
|
||||
|
||||
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
@s = internal local_unnamed_addr addrspace(3) global i32 undef, align 4
|
||||
|
||||
; CHECK-LABEL: @bar_sync
|
||||
; CHECK: store
|
||||
; CHECK: tail call void @llvm.nvvm.bar.sync(i32 0)
|
||||
; CHECK: load
|
||||
define dso_local i32 @bar_sync(i32 %0) local_unnamed_addr {
|
||||
store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
|
||||
tail call void @llvm.nvvm.bar.sync(i32 0)
|
||||
%2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
declare void @llvm.nvvm.bar.sync(i32) #0
|
||||
|
||||
; CHECK-LABEL: @barrier0
|
||||
; CHECK: store
|
||||
; CHECK: tail call void @llvm.nvvm.barrier0()
|
||||
; CHECK: load
|
||||
define dso_local i32 @barrier0(i32 %0) local_unnamed_addr {
|
||||
store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
|
||||
tail call void @llvm.nvvm.barrier0()
|
||||
%2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
declare void @llvm.nvvm.barrier0() #0
|
||||
|
||||
attributes #0 = { convergent nounwind }
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
|
||||
; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||
; CHECK-LABEL: @main()
|
||||
define dso_local i32 @main() {
|
||||
entry:
|
||||
%tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1
|
||||
%tmp0 = call i8* @llvm.stacksave() #1
|
||||
%tmp6 = load i8, i8* @deallocCalled, align 1
|
||||
%tobool = icmp ne i8 %tmp6, 0
|
||||
br i1 %tobool, label %if.else, label %if.end
|
||||
|
@ -18,10 +18,10 @@ if.else: ; preds = %entry
|
|||
unreachable
|
||||
|
||||
; CHECK-LABEL: if.end:
|
||||
; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop
|
||||
; CHECK-NEXT: call void @llvm.stackrestore
|
||||
; CHECK-NOT: load i8, i8* @deallocCalled
|
||||
if.end: ; preds = %entry
|
||||
call void @llvm.objc.autoreleasePoolPop(i8* %tmp0)
|
||||
call void @llvm.stackrestore(i8* %tmp0)
|
||||
%tmp7 = load i8, i8* @deallocCalled, align 1
|
||||
%tobool3 = icmp ne i8 %tmp7, 0
|
||||
br i1 %tobool3, label %if.end6, label %if.else5
|
||||
|
@ -35,10 +35,10 @@ if.end6: ; preds = %if.end
|
|||
ret i32 0
|
||||
}
|
||||
|
||||
declare i8* @llvm.objc.autoreleasePoolPush() #1
|
||||
declare void @llvm.objc.autoreleasePoolPop(i8*) #1
|
||||
declare i8* @llvm.stacksave() #1
|
||||
declare void @llvm.stackrestore(i8*) #1
|
||||
declare dso_local void @__assert_fail() #0
|
||||
|
||||
attributes #0 = { noreturn nounwind }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #0 = { noreturn nosync nounwind }
|
||||
attributes #1 = { nosync nounwind }
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
|
||||
; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
@ -14,7 +14,7 @@ entry:
|
|||
; CHECK-LABEL: @main()
|
||||
define dso_local i32 @main() {
|
||||
entry:
|
||||
%tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1
|
||||
%tmp0 = call i8* @llvm.stacksave() #1
|
||||
%tmp6 = load i8, i8* @deallocCalled, align 1
|
||||
%tobool = icmp ne i8 %tmp6, 0
|
||||
br i1 %tobool, label %if.else, label %if.end
|
||||
|
@ -24,10 +24,10 @@ if.else: ; preds = %entry
|
|||
unreachable
|
||||
|
||||
; CHECK-LABEL: if.end:
|
||||
; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop
|
||||
; CHECK-NEXT: call void @llvm.stackrestore
|
||||
; CHECK-NOT: load i8, i8* @deallocCalled
|
||||
if.end: ; preds = %entry
|
||||
call void @llvm.objc.autoreleasePoolPop(i8* %tmp0)
|
||||
call void @llvm.stackrestore(i8* %tmp0)
|
||||
%tmp7 = load i8, i8* @deallocCalled, align 1
|
||||
%tobool3 = icmp ne i8 %tmp7, 0
|
||||
br i1 %tobool3, label %if.end6, label %if.else5
|
||||
|
@ -41,10 +41,10 @@ if.end6: ; preds = %if.end
|
|||
ret i32 0
|
||||
}
|
||||
|
||||
declare i8* @llvm.objc.autoreleasePoolPush() #1
|
||||
declare void @llvm.objc.autoreleasePoolPop(i8*) #1
|
||||
declare i8* @llvm.stacksave() #1
|
||||
declare void @llvm.stackrestore(i8*) #1
|
||||
declare dso_local void @__assert_fail() #0
|
||||
|
||||
attributes #0 = { noreturn nounwind }
|
||||
attributes #1 = { nounwind }
|
||||
attributes #0 = { noreturn nosync nounwind }
|
||||
attributes #1 = { nosync nounwind }
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ entry:
|
|||
; CHECK-LABEL: @main()
|
||||
define dso_local i32 @main() {
|
||||
entry:
|
||||
%tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1
|
||||
%tmp0 = call i8* @llvm.stacksave() #1
|
||||
%tmp6 = load i8, i8* @deallocCalled, align 1
|
||||
%tobool = icmp ne i8 %tmp6, 0
|
||||
br i1 %tobool, label %if.else, label %if.end
|
||||
|
@ -28,9 +28,9 @@ if.else: ; preds = %entry
|
|||
unreachable
|
||||
|
||||
; CHECK-LABEL: if.end:
|
||||
; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop
|
||||
; CHECK-NEXT: call void @llvm.stackrestore
|
||||
if.end: ; preds = %entry
|
||||
call void @llvm.objc.autoreleasePoolPop(i8* %tmp0)
|
||||
call void @llvm.stackrestore(i8* %tmp0)
|
||||
%tmp7 = load i8, i8* @deallocCalled, align 1
|
||||
%tobool3 = icmp ne i8 %tmp7, 0
|
||||
br i1 %tobool3, label %if.end6, label %if.else5
|
||||
|
@ -44,8 +44,8 @@ if.end6: ; preds = %if.end
|
|||
ret i32 0
|
||||
}
|
||||
|
||||
declare i8* @llvm.objc.autoreleasePoolPush() #1
|
||||
declare void @llvm.objc.autoreleasePoolPop(i8*) #1
|
||||
declare i8* @llvm.stacksave() #1
|
||||
declare void @llvm.stackrestore(i8*) #1
|
||||
declare dso_local void @__assert_fail() #0
|
||||
|
||||
attributes #0 = { noreturn nounwind }
|
||||
|
|
|
@ -0,0 +1,133 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes
|
||||
; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
|
||||
|
||||
; Make sure we do not hoist the load before the intrinsic, unknown function, or
|
||||
; optnone function except if we know the unknown function is nosync and nocallback.
|
||||
|
||||
@G1 = internal global i32 undef
|
||||
@G2 = internal global i32 undef
|
||||
@G3 = internal global i32 undef
|
||||
@G4 = internal global i32 undef
|
||||
|
||||
define void @test_barrier(i1 %c) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@test_barrier
|
||||
; CHECK-SAME: (i1 [[C:%.*]]) {
|
||||
; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]]
|
||||
; CHECK: init:
|
||||
; CHECK-NEXT: store i32 0, ptr @G1, align 4
|
||||
; CHECK-NEXT: br label [[CHECK]]
|
||||
; CHECK: check:
|
||||
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G1, align 4
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
br i1 %c, label %init, label %check
|
||||
init:
|
||||
store i32 0, ptr @G1
|
||||
br label %check
|
||||
check:
|
||||
call void @llvm.amdgcn.s.barrier()
|
||||
%v = load i32, ptr @G1
|
||||
%cmp = icmp eq i32 %v, 0
|
||||
call void @llvm.assume(i1 %cmp)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_unknown(i1 %c) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@test_unknown
|
||||
; CHECK-SAME: (i1 [[C:%.*]]) {
|
||||
; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]]
|
||||
; CHECK: init:
|
||||
; CHECK-NEXT: store i32 0, ptr @G2, align 4
|
||||
; CHECK-NEXT: br label [[CHECK]]
|
||||
; CHECK: check:
|
||||
; CHECK-NEXT: call void @unknown()
|
||||
; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G2, align 4
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
br i1 %c, label %init, label %check
|
||||
init:
|
||||
store i32 0, ptr @G2
|
||||
br label %check
|
||||
check:
|
||||
call void @unknown()
|
||||
%v = load i32, ptr @G2
|
||||
%cmp = icmp eq i32 %v, 0
|
||||
call void @llvm.assume(i1 %cmp)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_optnone(i1 %c) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@test_optnone
|
||||
; CHECK-SAME: (i1 [[C:%.*]]) {
|
||||
; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]]
|
||||
; CHECK: init:
|
||||
; CHECK-NEXT: store i32 0, ptr @G3, align 4
|
||||
; CHECK-NEXT: br label [[CHECK]]
|
||||
; CHECK: check:
|
||||
; CHECK-NEXT: call void @optnone()
|
||||
; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G3, align 4
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
br i1 %c, label %init, label %check
|
||||
init:
|
||||
store i32 0, ptr @G3
|
||||
br label %check
|
||||
check:
|
||||
call void @optnone()
|
||||
%v = load i32, ptr @G3
|
||||
%cmp = icmp eq i32 %v, 0
|
||||
call void @llvm.assume(i1 %cmp)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @optnone() optnone nosync nocallback noinline {
|
||||
; CHECK: Function Attrs: nocallback noinline nosync optnone
|
||||
; CHECK-LABEL: define {{[^@]+}}@optnone
|
||||
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
}
|
||||
|
||||
; Here hoisting is legal and we use it to verify it will happen.
|
||||
define void @test_unknown_annotated(i1 %c) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@test_unknown_annotated
|
||||
; CHECK-SAME: (i1 [[C:%.*]]) {
|
||||
; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[DOTCHECK_CRIT_EDGE:%.*]]
|
||||
; CHECK: .check_crit_edge:
|
||||
; CHECK-NEXT: [[V_PRE:%.*]] = load i32, ptr @G4, align 4
|
||||
; CHECK-NEXT: br label [[CHECK:%.*]]
|
||||
; CHECK: init:
|
||||
; CHECK-NEXT: store i32 0, ptr @G4, align 4
|
||||
; CHECK-NEXT: br label [[CHECK]]
|
||||
; CHECK: check:
|
||||
; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V_PRE]], [[DOTCHECK_CRIT_EDGE]] ], [ 0, [[INIT]] ]
|
||||
; CHECK-NEXT: call void @unknown_nosync_nocallback()
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0
|
||||
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
br i1 %c, label %init, label %check
|
||||
init:
|
||||
store i32 0, ptr @G4
|
||||
br label %check
|
||||
check:
|
||||
call void @unknown_nosync_nocallback()
|
||||
%v = load i32, ptr @G4
|
||||
%cmp = icmp eq i32 %v, 0
|
||||
call void @llvm.assume(i1 %cmp)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @unknown()
|
||||
declare void @unknown_nosync_nocallback() nosync nocallback
|
||||
declare void @llvm.amdgcn.s.barrier()
|
||||
declare void @llvm.assume(i1 noundef)
|
||||
|
Loading…
Reference in New Issue