[GlobalsModRef][FIX] Ensure we honor synchronizing effects of intrinsics

This is a long standing problem that resurfaces once in a while [0].
There might actually be two problems because I'm not 100% sure if the
issue underlying https://reviews.llvm.org/D115302 would be solved by
this or not. Anyway.

In 2008 we thought intrinsics do not read/write globals passed to them:
d4133ac315
This is not correct given that intrinsics can synchronize threads and
cause effects to effectively become visible.

NOTE: I did not yet modify any tests but only tried out the reproducer
      of https://github.com/llvm/llvm-project/issues/54851.

Fixes: https://github.com/llvm/llvm-project/issues/54851

[0] https://discourse.llvm.org/t/bug-gvn-memdep-bug-in-the-presence-of-intrinsics/59402

Differential Revision: https://reviews.llvm.org/D123531
This commit is contained in:
Johannes Doerfert 2022-04-11 13:32:22 -05:00
parent 0f070bee82
commit 9dc7da3f9c
6 changed files with 206 additions and 23 deletions

View File

@ -511,6 +511,18 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
Handles.front().I = Handles.begin();
bool KnowNothing = false;
// Intrinsics, like any other synchronizing function, can make effects
// of other threads visible. Without nosync we know nothing really.
// Similarly, if `nocallback` is missing the function, or intrinsic,
// can call into the module arbitrarily. If both are set the function
// has an effect but will not interact with accesses of internal
// globals inside the module. We are conservative here for optnone
// functions, might not be necessary.
auto MaySyncOrCallIntoModule = [](const Function &F) {
return !F.isDeclaration() || !F.hasNoSync() ||
!F.hasFnAttribute(Attribute::NoCallback);
};
// Collect the mod/ref properties due to called functions. We only compute
// one mod-ref set.
for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
@ -525,7 +537,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// Can't do better than that!
} else if (F->onlyReadsMemory()) {
FI.addModRefInfo(ModRefInfo::Ref);
if (!F->isIntrinsic() && !F->onlyAccessesArgMemory())
if (!F->onlyAccessesArgMemory() && MaySyncOrCallIntoModule(*F))
// This function might call back into the module and read a global -
// consider every global as possibly being read by this function.
FI.setMayReadAnyGlobal();
@ -533,7 +545,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
FI.addModRefInfo(ModRefInfo::ModRef);
if (!F->onlyAccessesArgMemory())
FI.setMayReadAnyGlobal();
if (!F->isIntrinsic()) {
if (MaySyncOrCallIntoModule(*F)) {
KnowNothing = true;
break;
}

View File

@ -0,0 +1,38 @@
; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
;
; Functions w/o `nosync` attribute may communicate via memory and must be
; treated conservatively. Taken from https://reviews.llvm.org/D115302.
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"
@s = internal local_unnamed_addr addrspace(3) global i32 undef, align 4
; CHECK-LABEL: @bar_sync
; CHECK: store
; CHECK: tail call void @llvm.nvvm.bar.sync(i32 0)
; CHECK: load
define dso_local i32 @bar_sync(i32 %0) local_unnamed_addr {
store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
tail call void @llvm.nvvm.bar.sync(i32 0)
%2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
ret i32 %2
}
declare void @llvm.nvvm.bar.sync(i32) #0
; CHECK-LABEL: @barrier0
; CHECK: store
; CHECK: tail call void @llvm.nvvm.barrier0()
; CHECK: load
define dso_local i32 @barrier0(i32 %0) local_unnamed_addr {
store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
tail call void @llvm.nvvm.barrier0()
%2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4
ret i32 %2
}
declare void @llvm.nvvm.barrier0() #0
attributes #0 = { convergent nounwind }

View File

@ -1,4 +1,4 @@
; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK-LABEL: @main()
define dso_local i32 @main() {
entry:
%tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1
%tmp0 = call i8* @llvm.stacksave() #1
%tmp6 = load i8, i8* @deallocCalled, align 1
%tobool = icmp ne i8 %tmp6, 0
br i1 %tobool, label %if.else, label %if.end
@ -18,10 +18,10 @@ if.else: ; preds = %entry
unreachable
; CHECK-LABEL: if.end:
; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop
; CHECK-NEXT: call void @llvm.stackrestore
; CHECK-NOT: load i8, i8* @deallocCalled
if.end: ; preds = %entry
call void @llvm.objc.autoreleasePoolPop(i8* %tmp0)
call void @llvm.stackrestore(i8* %tmp0)
%tmp7 = load i8, i8* @deallocCalled, align 1
%tobool3 = icmp ne i8 %tmp7, 0
br i1 %tobool3, label %if.end6, label %if.else5
@ -35,10 +35,10 @@ if.end6: ; preds = %if.end
ret i32 0
}
declare i8* @llvm.objc.autoreleasePoolPush() #1
declare void @llvm.objc.autoreleasePoolPop(i8*) #1
declare i8* @llvm.stacksave() #1
declare void @llvm.stackrestore(i8*) #1
declare dso_local void @__assert_fail() #0
attributes #0 = { noreturn nounwind }
attributes #1 = { nounwind }
attributes #0 = { noreturn nosync nounwind }
attributes #1 = { nosync nounwind }

View File

@ -1,4 +1,4 @@
; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@ -14,7 +14,7 @@ entry:
; CHECK-LABEL: @main()
define dso_local i32 @main() {
entry:
%tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1
%tmp0 = call i8* @llvm.stacksave() #1
%tmp6 = load i8, i8* @deallocCalled, align 1
%tobool = icmp ne i8 %tmp6, 0
br i1 %tobool, label %if.else, label %if.end
@ -24,10 +24,10 @@ if.else: ; preds = %entry
unreachable
; CHECK-LABEL: if.end:
; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop
; CHECK-NEXT: call void @llvm.stackrestore
; CHECK-NOT: load i8, i8* @deallocCalled
if.end: ; preds = %entry
call void @llvm.objc.autoreleasePoolPop(i8* %tmp0)
call void @llvm.stackrestore(i8* %tmp0)
%tmp7 = load i8, i8* @deallocCalled, align 1
%tobool3 = icmp ne i8 %tmp7, 0
br i1 %tobool3, label %if.end6, label %if.else5
@ -41,10 +41,10 @@ if.end6: ; preds = %if.end
ret i32 0
}
declare i8* @llvm.objc.autoreleasePoolPush() #1
declare void @llvm.objc.autoreleasePoolPop(i8*) #1
declare i8* @llvm.stacksave() #1
declare void @llvm.stackrestore(i8*) #1
declare dso_local void @__assert_fail() #0
attributes #0 = { noreturn nounwind }
attributes #1 = { nounwind }
attributes #0 = { noreturn nosync nounwind }
attributes #1 = { nosync nounwind }

View File

@ -18,7 +18,7 @@ entry:
; CHECK-LABEL: @main()
define dso_local i32 @main() {
entry:
%tmp0 = call i8* @llvm.objc.autoreleasePoolPush() #1
%tmp0 = call i8* @llvm.stacksave() #1
%tmp6 = load i8, i8* @deallocCalled, align 1
%tobool = icmp ne i8 %tmp6, 0
br i1 %tobool, label %if.else, label %if.end
@ -28,9 +28,9 @@ if.else: ; preds = %entry
unreachable
; CHECK-LABEL: if.end:
; CHECK-NEXT: call void @llvm.objc.autoreleasePoolPop
; CHECK-NEXT: call void @llvm.stackrestore
if.end: ; preds = %entry
call void @llvm.objc.autoreleasePoolPop(i8* %tmp0)
call void @llvm.stackrestore(i8* %tmp0)
%tmp7 = load i8, i8* @deallocCalled, align 1
%tobool3 = icmp ne i8 %tmp7, 0
br i1 %tobool3, label %if.end6, label %if.else5
@ -44,8 +44,8 @@ if.end6: ; preds = %if.end
ret i32 0
}
declare i8* @llvm.objc.autoreleasePoolPush() #1
declare void @llvm.objc.autoreleasePoolPop(i8*) #1
declare i8* @llvm.stacksave() #1
declare void @llvm.stackrestore(i8*) #1
declare dso_local void @__assert_fail() #0
attributes #0 = { noreturn nounwind }

View File

@ -0,0 +1,133 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes
; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require<globals-aa>,gvn' -S < %s | FileCheck %s
; Make sure we do not hoist the load before the intrinsic, unknown function, or
; optnone function except if we know the unknown function is nosync and nocallback.
@G1 = internal global i32 undef
@G2 = internal global i32 undef
@G3 = internal global i32 undef
@G4 = internal global i32 undef
define void @test_barrier(i1 %c) {
; CHECK-LABEL: define {{[^@]+}}@test_barrier
; CHECK-SAME: (i1 [[C:%.*]]) {
; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]]
; CHECK: init:
; CHECK-NEXT: store i32 0, ptr @G1, align 4
; CHECK-NEXT: br label [[CHECK]]
; CHECK: check:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G1, align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
; CHECK-NEXT: ret void
;
br i1 %c, label %init, label %check
init:
store i32 0, ptr @G1
br label %check
check:
call void @llvm.amdgcn.s.barrier()
%v = load i32, ptr @G1
%cmp = icmp eq i32 %v, 0
call void @llvm.assume(i1 %cmp)
ret void
}
define void @test_unknown(i1 %c) {
; CHECK-LABEL: define {{[^@]+}}@test_unknown
; CHECK-SAME: (i1 [[C:%.*]]) {
; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]]
; CHECK: init:
; CHECK-NEXT: store i32 0, ptr @G2, align 4
; CHECK-NEXT: br label [[CHECK]]
; CHECK: check:
; CHECK-NEXT: call void @unknown()
; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G2, align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
; CHECK-NEXT: ret void
;
br i1 %c, label %init, label %check
init:
store i32 0, ptr @G2
br label %check
check:
call void @unknown()
%v = load i32, ptr @G2
%cmp = icmp eq i32 %v, 0
call void @llvm.assume(i1 %cmp)
ret void
}
define void @test_optnone(i1 %c) {
; CHECK-LABEL: define {{[^@]+}}@test_optnone
; CHECK-SAME: (i1 [[C:%.*]]) {
; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]]
; CHECK: init:
; CHECK-NEXT: store i32 0, ptr @G3, align 4
; CHECK-NEXT: br label [[CHECK]]
; CHECK: check:
; CHECK-NEXT: call void @optnone()
; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G3, align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
; CHECK-NEXT: ret void
;
br i1 %c, label %init, label %check
init:
store i32 0, ptr @G3
br label %check
check:
call void @optnone()
%v = load i32, ptr @G3
%cmp = icmp eq i32 %v, 0
call void @llvm.assume(i1 %cmp)
ret void
}
define void @optnone() optnone nosync nocallback noinline {
; CHECK: Function Attrs: nocallback noinline nosync optnone
; CHECK-LABEL: define {{[^@]+}}@optnone
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: ret void
;
ret void
}
; Here hoisting is legal and we use it to verify it will happen.
define void @test_unknown_annotated(i1 %c) {
; CHECK-LABEL: define {{[^@]+}}@test_unknown_annotated
; CHECK-SAME: (i1 [[C:%.*]]) {
; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[DOTCHECK_CRIT_EDGE:%.*]]
; CHECK: .check_crit_edge:
; CHECK-NEXT: [[V_PRE:%.*]] = load i32, ptr @G4, align 4
; CHECK-NEXT: br label [[CHECK:%.*]]
; CHECK: init:
; CHECK-NEXT: store i32 0, ptr @G4, align 4
; CHECK-NEXT: br label [[CHECK]]
; CHECK: check:
; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V_PRE]], [[DOTCHECK_CRIT_EDGE]] ], [ 0, [[INIT]] ]
; CHECK-NEXT: call void @unknown_nosync_nocallback()
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
; CHECK-NEXT: ret void
;
br i1 %c, label %init, label %check
init:
store i32 0, ptr @G4
br label %check
check:
call void @unknown_nosync_nocallback()
%v = load i32, ptr @G4
%cmp = icmp eq i32 %v, 0
call void @llvm.assume(i1 %cmp)
ret void
}
declare void @unknown()
declare void @unknown_nosync_nocallback() nosync nocallback
declare void @llvm.amdgcn.s.barrier()
declare void @llvm.assume(i1 noundef)