llvm-project/llvm/test/Transforms/GVN/fence.ll

; RUN: opt -S -debugify -basicaa -gvn < %s | FileCheck %s

@a = external constant i32
; We can value forward across the fence since we can (semantically) 
; reorder the following load before the fence.
define i32 @test(i32* %addr.i) {
; CHECK-LABEL: @test
; CHECK: store
; CHECK: fence
; CHECK-NOT: load
; CHECK: ret
  store i32 5, i32* %addr.i, align 4
  fence release
  %a = load i32, i32* %addr.i, align 4
  ret i32 %a
}

; Same as above
define i32 @test2(i32* %addr.i) {
; CHECK-LABEL: @test2
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* %addr.i, metadata [[var_a:![0-9]+]], metadata !DIExpression(DW_OP_deref))
; CHECK-NEXT: fence
; CHECK-NOT: load
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* %addr.i, metadata [[var_a2:![0-9]+]], metadata !DIExpression(DW_OP_deref))
; CHECK: ret
  %a = load i32, i32* %addr.i, align 4
  fence release
  %a2 = load i32, i32* %addr.i, align 4
  %res = sub i32 %a, %a2
  ret i32 %res
}

; We can not value forward across an acquire barrier since we might
; be syncronizing with another thread storing to the same variable
; followed by a release fence.  This is not so much enforcing an
; ordering property (though it is that too), but a liveness 
; property.  We expect to eventually see the value of store by
; another thread when spinning on that location.  
define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) {
; CHECK-LABEL: @test3
; CHECK: load
; CHECK: fence
; CHECK: load
; CHECK: ret i32 %res
  ; the following code is intented to model the unrolling of
  ; two iterations in a spin loop of the form:
  ;   do { fence acquire: tmp = *%addr.i; ) while (!tmp);
  ; It's hopefully clear that allowing PRE to turn this into:
  ;   if (!*%addr.i) while(true) {} would be unfortunate
  fence acquire
  %a = load i32, i32* %addr.i, align 4
  fence acquire
  %a2 = load i32, i32* %addr.i, align 4
  %res = sub i32 %a, %a2
  ret i32 %res
}

; We can forward the value forward the load
; across both the fences, because the load is from
; a constant memory location.
define i32 @test4(i32* %addr) {
; CHECK-LABEL: @test4
; CHECK-NOT: load
; CHECK: fence release
; CHECK: store
; CHECK: fence seq_cst
; CHECK: ret i32 0
  %var = load i32, i32* @a
  fence release
  store i32 42, i32* %addr, align 8
  fence seq_cst
  %var2 = load i32, i32* @a
  %var3 = sub i32 %var, %var2
  ret i32 %var3
}

; Another example of why forwarding across an acquire fence is problematic
; can be seen in a normal locking operation.  Say we had:
; *p = 5; unlock(l); lock(l); use(p);
; forwarding the store to p would be invalid.  A reasonable implementation
; of unlock and lock might be:
; unlock() { atomicrmw sub %l, 1 unordered; fence release }
; lock() { 
;   do {
;     %res = cmpxchg %p, 0, 1, monotonic monotonic
;   } while(!%res.success)
;   fence acquire;
; }
; Given we chose to forward across the release fence, we clearly can't forward
; across the acquire fence as well.

; CHECK: [[var_a]] = !DILocalVariable
; CHECK-NEXT: [[var_a2]] = !DILocalVariable
[GVN] Salvage debug info from dead insts This preserves an additional 581 unique source variables in a stage2 build of clang (according to `llvm-dwarfdump --statistics`). It increases the size of the .debug_loc section by 0.1% (or 87139 bytes). Differential Revision: https://reviews.llvm.org/D43255 llvm-svn: 325063 2018-02-14 06:27:17 +08:00			`; RUN: opt -S -debugify -basicaa -gvn < %s \| FileCheck %s`
Allow value forwarding past release fences in GVN A release fence acts as a publication barrier for stores within the current thread to become visible to other threads which might observe the release fence. It does not require the current thread to observe stores performed on other threads. As a result, we can allow store-load and load-load forwarding across a release fence. We choose to be much more conservative about stores. In theory, nothing prevents us from shifting a store from after a release fence to before it, and then eliminating the preceeding (previously fenced) store. Doing this without actually moving the second store is likely also legal, but we chose to be conservative at this time. The LangRef indicates only atomic loads and stores are effected by fences. This patch chooses to be far more conservative then that. This is the GVN companion to http://reviews.llvm.org/D11434 which applied the same logic in EarlyCSE and has been baking in tree for a while now. Differential Revision: http://reviews.llvm.org/D11436 llvm-svn: 264472 2016-03-26 06:40:35 +08:00
[AliasAnalysis] Fences do not modify constant memory location Summary: Fence instructions are currently marked as `ModRef` for all memory locations. We can improve this for constant memory locations (such as constant globals), since fence instructions cannot modify these locations. This helps us to forward constant loads across fences (added test case in GVN). There were no changes in behaviour for similar test cases in early-cse and licm. Reviewers: dberlin, sanjoy, reames Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D28914 llvm-svn: 292546 2017-01-20 08:21:33 +08:00			`@a = external constant i32`
Allow value forwarding past release fences in GVN A release fence acts as a publication barrier for stores within the current thread to become visible to other threads which might observe the release fence. It does not require the current thread to observe stores performed on other threads. As a result, we can allow store-load and load-load forwarding across a release fence. We choose to be much more conservative about stores. In theory, nothing prevents us from shifting a store from after a release fence to before it, and then eliminating the preceeding (previously fenced) store. Doing this without actually moving the second store is likely also legal, but we chose to be conservative at this time. The LangRef indicates only atomic loads and stores are effected by fences. This patch chooses to be far more conservative then that. This is the GVN companion to http://reviews.llvm.org/D11434 which applied the same logic in EarlyCSE and has been baking in tree for a while now. Differential Revision: http://reviews.llvm.org/D11436 llvm-svn: 264472 2016-03-26 06:40:35 +08:00			`; We can value forward across the fence since we can (semantically)`
			`; reorder the following load before the fence.`
			`define i32 @test(i32* %addr.i) {`
			`; CHECK-LABEL: @test`
			`; CHECK: store`
			`; CHECK: fence`
			`; CHECK-NOT: load`
			`; CHECK: ret`
			`store i32 5, i32* %addr.i, align 4`
			`fence release`
			`%a = load i32, i32* %addr.i, align 4`
			`ret i32 %a`
			`}`

			`; Same as above`
			`define i32 @test2(i32* %addr.i) {`
			`; CHECK-LABEL: @test2`
[GVN] Salvage debug info from dead insts This preserves an additional 581 unique source variables in a stage2 build of clang (according to `llvm-dwarfdump --statistics`). It increases the size of the .debug_loc section by 0.1% (or 87139 bytes). Differential Revision: https://reviews.llvm.org/D43255 llvm-svn: 325063 2018-02-14 06:27:17 +08:00			`; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* %addr.i, metadata [[var_a:![0-9]+]], metadata !DIExpression(DW_OP_deref))`
[Debugify] Move debug value intrinsics closer to their operand defs Before this patch, debugify would insert debug value intrinsics before the terminating instruction in a block. This had the advantage of being simple, but was a bit too simple/unrealistic. This patch teaches debugify to insert debug values immediately after their operand defs. This enables better testing of the compiler. For example, with this patch, `opt -debugify-each` is able to identify a vectorizer DI-invariance bug fixed in llvm.org/PR32761. In this bug, the vectorizer produced different output with/without debug info present. Reverting Davide's bugfix locally, I see: $ ~/scripts/opt-check-dbg-invar.sh ./bin/opt \ .../SLPVectorizer/AArch64/spillcost-di.ll -slp-vectorizer Comparing: -slp-vectorizer .../SLPVectorizer/AArch64/spillcost-di.ll Baseline: /var/folders/j8/t4w0bp8j6x1g6fpghkcb4sjm0000gp/T/tmp.iYYeL1kf With DI : /var/folders/j8/t4w0bp8j6x1g6fpghkcb4sjm0000gp/T/tmp.sQtQSeet 9,11c9,11 < %5 = getelementptr inbounds %0, %0* %2, i64 %0, i32 1 < %6 = bitcast i64* %4 to <2 x i64>* < %7 = load <2 x i64>, <2 x i64>* %6, align 8, !tbaa !0 --- > %5 = load i64, i64* %4, align 8, !tbaa !0 > %6 = getelementptr inbounds %0, %0* %2, i64 %0, i32 1 > %7 = load i64, i64* %6, align 8, !tbaa !5 12a13 > store i64 %5, i64* %8, align 8, !tbaa !0 14,15c15 < %10 = bitcast i64* %8 to <2 x i64>* < store <2 x i64> %7, <2 x i64>* %10, align 8, !tbaa !0 --- > store i64 %7, i64* %9, align 8, !tbaa !5 :: Found a test case ^ Running this over the *.ll files in tree, I found four additional examples which compile differently with/without DI present. I plan on filing bugs for these. llvm-svn: 334118 2018-06-07 03:05:42 +08:00			`; CHECK-NEXT: fence`
Allow value forwarding past release fences in GVN A release fence acts as a publication barrier for stores within the current thread to become visible to other threads which might observe the release fence. It does not require the current thread to observe stores performed on other threads. As a result, we can allow store-load and load-load forwarding across a release fence. We choose to be much more conservative about stores. In theory, nothing prevents us from shifting a store from after a release fence to before it, and then eliminating the preceeding (previously fenced) store. Doing this without actually moving the second store is likely also legal, but we chose to be conservative at this time. The LangRef indicates only atomic loads and stores are effected by fences. This patch chooses to be far more conservative then that. This is the GVN companion to http://reviews.llvm.org/D11434 which applied the same logic in EarlyCSE and has been baking in tree for a while now. Differential Revision: http://reviews.llvm.org/D11436 llvm-svn: 264472 2016-03-26 06:40:35 +08:00			`; CHECK-NOT: load`
[Debugify] Move debug value intrinsics closer to their operand defs Before this patch, debugify would insert debug value intrinsics before the terminating instruction in a block. This had the advantage of being simple, but was a bit too simple/unrealistic. This patch teaches debugify to insert debug values immediately after their operand defs. This enables better testing of the compiler. For example, with this patch, `opt -debugify-each` is able to identify a vectorizer DI-invariance bug fixed in llvm.org/PR32761. In this bug, the vectorizer produced different output with/without debug info present. Reverting Davide's bugfix locally, I see: $ ~/scripts/opt-check-dbg-invar.sh ./bin/opt \ .../SLPVectorizer/AArch64/spillcost-di.ll -slp-vectorizer Comparing: -slp-vectorizer .../SLPVectorizer/AArch64/spillcost-di.ll Baseline: /var/folders/j8/t4w0bp8j6x1g6fpghkcb4sjm0000gp/T/tmp.iYYeL1kf With DI : /var/folders/j8/t4w0bp8j6x1g6fpghkcb4sjm0000gp/T/tmp.sQtQSeet 9,11c9,11 < %5 = getelementptr inbounds %0, %0* %2, i64 %0, i32 1 < %6 = bitcast i64* %4 to <2 x i64>* < %7 = load <2 x i64>, <2 x i64>* %6, align 8, !tbaa !0 --- > %5 = load i64, i64* %4, align 8, !tbaa !0 > %6 = getelementptr inbounds %0, %0* %2, i64 %0, i32 1 > %7 = load i64, i64* %6, align 8, !tbaa !5 12a13 > store i64 %5, i64* %8, align 8, !tbaa !0 14,15c15 < %10 = bitcast i64* %8 to <2 x i64>* < store <2 x i64> %7, <2 x i64>* %10, align 8, !tbaa !0 --- > store i64 %7, i64* %9, align 8, !tbaa !5 :: Found a test case ^ Running this over the *.ll files in tree, I found four additional examples which compile differently with/without DI present. I plan on filing bugs for these. llvm-svn: 334118 2018-06-07 03:05:42 +08:00			`; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* %addr.i, metadata [[var_a2:![0-9]+]], metadata !DIExpression(DW_OP_deref))`
Allow value forwarding past release fences in GVN A release fence acts as a publication barrier for stores within the current thread to become visible to other threads which might observe the release fence. It does not require the current thread to observe stores performed on other threads. As a result, we can allow store-load and load-load forwarding across a release fence. We choose to be much more conservative about stores. In theory, nothing prevents us from shifting a store from after a release fence to before it, and then eliminating the preceeding (previously fenced) store. Doing this without actually moving the second store is likely also legal, but we chose to be conservative at this time. The LangRef indicates only atomic loads and stores are effected by fences. This patch chooses to be far more conservative then that. This is the GVN companion to http://reviews.llvm.org/D11434 which applied the same logic in EarlyCSE and has been baking in tree for a while now. Differential Revision: http://reviews.llvm.org/D11436 llvm-svn: 264472 2016-03-26 06:40:35 +08:00			`; CHECK: ret`
			`%a = load i32, i32* %addr.i, align 4`
			`fence release`
			`%a2 = load i32, i32* %addr.i, align 4`
			`%res = sub i32 %a, %a2`
			`ret i32 %res`
			`}`

			`; We can not value forward across an acquire barrier since we might`
			`; be syncronizing with another thread storing to the same variable`
			`; followed by a release fence. This is not so much enforcing an`
			`; ordering property (though it is that too), but a liveness`
			`; property. We expect to eventually see the value of store by`
			`; another thread when spinning on that location.`
			`define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) {`
			`; CHECK-LABEL: @test3`
			`; CHECK: load`
			`; CHECK: fence`
			`; CHECK: load`
			`; CHECK: ret i32 %res`
			`; the following code is intented to model the unrolling of`
			`; two iterations in a spin loop of the form:`
			`; do { fence acquire: tmp = *%addr.i; ) while (!tmp);`
			`; It's hopefully clear that allowing PRE to turn this into:`
			`; if (!*%addr.i) while(true) {} would be unfortunate`
			`fence acquire`
			`%a = load i32, i32* %addr.i, align 4`
			`fence acquire`
			`%a2 = load i32, i32* %addr.i, align 4`
			`%res = sub i32 %a, %a2`
			`ret i32 %res`
			`}`

[AliasAnalysis] Fences do not modify constant memory location Summary: Fence instructions are currently marked as `ModRef` for all memory locations. We can improve this for constant memory locations (such as constant globals), since fence instructions cannot modify these locations. This helps us to forward constant loads across fences (added test case in GVN). There were no changes in behaviour for similar test cases in early-cse and licm. Reviewers: dberlin, sanjoy, reames Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D28914 llvm-svn: 292546 2017-01-20 08:21:33 +08:00			`; We can forward the value forward the load`
			`; across both the fences, because the load is from`
			`; a constant memory location.`
			`define i32 @test4(i32* %addr) {`
			`; CHECK-LABEL: @test4`
			`; CHECK-NOT: load`
			`; CHECK: fence release`
			`; CHECK: store`
			`; CHECK: fence seq_cst`
			`; CHECK: ret i32 0`
			`%var = load i32, i32* @a`
			`fence release`
			`store i32 42, i32* %addr, align 8`
			`fence seq_cst`
			`%var2 = load i32, i32* @a`
			`%var3 = sub i32 %var, %var2`
			`ret i32 %var3`
			`}`

Allow value forwarding past release fences in GVN A release fence acts as a publication barrier for stores within the current thread to become visible to other threads which might observe the release fence. It does not require the current thread to observe stores performed on other threads. As a result, we can allow store-load and load-load forwarding across a release fence. We choose to be much more conservative about stores. In theory, nothing prevents us from shifting a store from after a release fence to before it, and then eliminating the preceeding (previously fenced) store. Doing this without actually moving the second store is likely also legal, but we chose to be conservative at this time. The LangRef indicates only atomic loads and stores are effected by fences. This patch chooses to be far more conservative then that. This is the GVN companion to http://reviews.llvm.org/D11434 which applied the same logic in EarlyCSE and has been baking in tree for a while now. Differential Revision: http://reviews.llvm.org/D11436 llvm-svn: 264472 2016-03-26 06:40:35 +08:00			`; Another example of why forwarding across an acquire fence is problematic`
			`; can be seen in a normal locking operation. Say we had:`
			`; *p = 5; unlock(l); lock(l); use(p);`
			`; forwarding the store to p would be invalid. A reasonable implementation`
			`; of unlock and lock might be:`
			`; unlock() { atomicrmw sub %l, 1 unordered; fence release }`
			`; lock() {`
			`; do {`
			`; %res = cmpxchg %p, 0, 1, monotonic monotonic`
			`; } while(!%res.success)`
			`; fence acquire;`
			`; }`
			`; Given we chose to forward across the release fence, we clearly can't forward`
			`; across the acquire fence as well.`

[GVN] Salvage debug info from dead insts This preserves an additional 581 unique source variables in a stage2 build of clang (according to `llvm-dwarfdump --statistics`). It increases the size of the .debug_loc section by 0.1% (or 87139 bytes). Differential Revision: https://reviews.llvm.org/D43255 llvm-svn: 325063 2018-02-14 06:27:17 +08:00			`; CHECK: [[var_a]] = !DILocalVariable`
			`; CHECK-NEXT: [[var_a2]] = !DILocalVariable`