2018-02-14 06:27:17 +08:00
|
|
|
; RUN: opt -S -debugify -basicaa -gvn < %s | FileCheck %s
|
2016-03-26 06:40:35 +08:00
|
|
|
|
2017-01-20 08:21:33 +08:00
|
|
|
@a = external constant i32
|
2016-03-26 06:40:35 +08:00
|
|
|
; We can value forward across the fence since we can (semantically)
|
|
|
|
; reorder the following load before the fence.
|
|
|
|
define i32 @test(i32* %addr.i) {
|
|
|
|
; CHECK-LABEL: @test
|
|
|
|
; CHECK: store
|
|
|
|
; CHECK: fence
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret
|
|
|
|
store i32 5, i32* %addr.i, align 4
|
|
|
|
fence release
|
|
|
|
%a = load i32, i32* %addr.i, align 4
|
|
|
|
ret i32 %a
|
|
|
|
}
|
|
|
|
|
|
|
|
; Same as above
|
|
|
|
define i32 @test2(i32* %addr.i) {
|
|
|
|
; CHECK-LABEL: @test2
|
2018-02-14 06:27:17 +08:00
|
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* %addr.i, metadata [[var_a:![0-9]+]], metadata !DIExpression(DW_OP_deref))
|
[Debugify] Move debug value intrinsics closer to their operand defs
Before this patch, debugify would insert debug value intrinsics before the
terminating instruction in a block. This had the advantage of being simple,
but was a bit too simple/unrealistic.
This patch teaches debugify to insert debug values immediately after their
operand defs. This enables better testing of the compiler.
For example, with this patch, `opt -debugify-each` is able to identify a
vectorizer DI-invariance bug fixed in llvm.org/PR32761. In this bug, the
vectorizer produced different output with/without debug info present.
Reverting Davide's bugfix locally, I see:
$ ~/scripts/opt-check-dbg-invar.sh ./bin/opt \
.../SLPVectorizer/AArch64/spillcost-di.ll -slp-vectorizer
Comparing: -slp-vectorizer .../SLPVectorizer/AArch64/spillcost-di.ll
Baseline: /var/folders/j8/t4w0bp8j6x1g6fpghkcb4sjm0000gp/T/tmp.iYYeL1kf
With DI : /var/folders/j8/t4w0bp8j6x1g6fpghkcb4sjm0000gp/T/tmp.sQtQSeet
9,11c9,11
< %5 = getelementptr inbounds %0, %0* %2, i64 %0, i32 1
< %6 = bitcast i64* %4 to <2 x i64>*
< %7 = load <2 x i64>, <2 x i64>* %6, align 8, !tbaa !0
---
> %5 = load i64, i64* %4, align 8, !tbaa !0
> %6 = getelementptr inbounds %0, %0* %2, i64 %0, i32 1
> %7 = load i64, i64* %6, align 8, !tbaa !5
12a13
> store i64 %5, i64* %8, align 8, !tbaa !0
14,15c15
< %10 = bitcast i64* %8 to <2 x i64>*
< store <2 x i64> %7, <2 x i64>* %10, align 8, !tbaa !0
---
> store i64 %7, i64* %9, align 8, !tbaa !5
:: Found a test case ^
Running this over the *.ll files in tree, I found four additional examples
which compile differently with/without DI present. I plan on filing bugs for
these.
llvm-svn: 334118
2018-06-07 03:05:42 +08:00
|
|
|
; CHECK-NEXT: fence
|
2016-03-26 06:40:35 +08:00
|
|
|
; CHECK-NOT: load
|
[Debugify] Move debug value intrinsics closer to their operand defs
Before this patch, debugify would insert debug value intrinsics before the
terminating instruction in a block. This had the advantage of being simple,
but was a bit too simple/unrealistic.
This patch teaches debugify to insert debug values immediately after their
operand defs. This enables better testing of the compiler.
For example, with this patch, `opt -debugify-each` is able to identify a
vectorizer DI-invariance bug fixed in llvm.org/PR32761. In this bug, the
vectorizer produced different output with/without debug info present.
Reverting Davide's bugfix locally, I see:
$ ~/scripts/opt-check-dbg-invar.sh ./bin/opt \
.../SLPVectorizer/AArch64/spillcost-di.ll -slp-vectorizer
Comparing: -slp-vectorizer .../SLPVectorizer/AArch64/spillcost-di.ll
Baseline: /var/folders/j8/t4w0bp8j6x1g6fpghkcb4sjm0000gp/T/tmp.iYYeL1kf
With DI : /var/folders/j8/t4w0bp8j6x1g6fpghkcb4sjm0000gp/T/tmp.sQtQSeet
9,11c9,11
< %5 = getelementptr inbounds %0, %0* %2, i64 %0, i32 1
< %6 = bitcast i64* %4 to <2 x i64>*
< %7 = load <2 x i64>, <2 x i64>* %6, align 8, !tbaa !0
---
> %5 = load i64, i64* %4, align 8, !tbaa !0
> %6 = getelementptr inbounds %0, %0* %2, i64 %0, i32 1
> %7 = load i64, i64* %6, align 8, !tbaa !5
12a13
> store i64 %5, i64* %8, align 8, !tbaa !0
14,15c15
< %10 = bitcast i64* %8 to <2 x i64>*
< store <2 x i64> %7, <2 x i64>* %10, align 8, !tbaa !0
---
> store i64 %7, i64* %9, align 8, !tbaa !5
:: Found a test case ^
Running this over the *.ll files in tree, I found four additional examples
which compile differently with/without DI present. I plan on filing bugs for
these.
llvm-svn: 334118
2018-06-07 03:05:42 +08:00
|
|
|
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* %addr.i, metadata [[var_a2:![0-9]+]], metadata !DIExpression(DW_OP_deref))
|
2016-03-26 06:40:35 +08:00
|
|
|
; CHECK: ret
|
|
|
|
%a = load i32, i32* %addr.i, align 4
|
|
|
|
fence release
|
|
|
|
%a2 = load i32, i32* %addr.i, align 4
|
|
|
|
%res = sub i32 %a, %a2
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
|
|
|
|
; We can not value forward across an acquire barrier since we might
|
|
|
|
; be syncronizing with another thread storing to the same variable
|
|
|
|
; followed by a release fence. This is not so much enforcing an
|
|
|
|
; ordering property (though it is that too), but a liveness
|
|
|
|
; property. We expect to eventually see the value of store by
|
|
|
|
; another thread when spinning on that location.
|
|
|
|
define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) {
|
|
|
|
; CHECK-LABEL: @test3
|
|
|
|
; CHECK: load
|
|
|
|
; CHECK: fence
|
|
|
|
; CHECK: load
|
|
|
|
; CHECK: ret i32 %res
|
|
|
|
; the following code is intented to model the unrolling of
|
|
|
|
; two iterations in a spin loop of the form:
|
|
|
|
; do { fence acquire: tmp = *%addr.i; ) while (!tmp);
|
|
|
|
; It's hopefully clear that allowing PRE to turn this into:
|
|
|
|
; if (!*%addr.i) while(true) {} would be unfortunate
|
|
|
|
fence acquire
|
|
|
|
%a = load i32, i32* %addr.i, align 4
|
|
|
|
fence acquire
|
|
|
|
%a2 = load i32, i32* %addr.i, align 4
|
|
|
|
%res = sub i32 %a, %a2
|
|
|
|
ret i32 %res
|
|
|
|
}
|
|
|
|
|
2017-01-20 08:21:33 +08:00
|
|
|
; We can forward the value forward the load
|
|
|
|
; across both the fences, because the load is from
|
|
|
|
; a constant memory location.
|
|
|
|
define i32 @test4(i32* %addr) {
|
|
|
|
; CHECK-LABEL: @test4
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: fence release
|
|
|
|
; CHECK: store
|
|
|
|
; CHECK: fence seq_cst
|
|
|
|
; CHECK: ret i32 0
|
|
|
|
%var = load i32, i32* @a
|
|
|
|
fence release
|
|
|
|
store i32 42, i32* %addr, align 8
|
|
|
|
fence seq_cst
|
|
|
|
%var2 = load i32, i32* @a
|
|
|
|
%var3 = sub i32 %var, %var2
|
|
|
|
ret i32 %var3
|
|
|
|
}
|
|
|
|
|
2016-03-26 06:40:35 +08:00
|
|
|
; Another example of why forwarding across an acquire fence is problematic
|
|
|
|
; can be seen in a normal locking operation. Say we had:
|
|
|
|
; *p = 5; unlock(l); lock(l); use(p);
|
|
|
|
; forwarding the store to p would be invalid. A reasonable implementation
|
|
|
|
; of unlock and lock might be:
|
|
|
|
; unlock() { atomicrmw sub %l, 1 unordered; fence release }
|
|
|
|
; lock() {
|
|
|
|
; do {
|
|
|
|
; %res = cmpxchg %p, 0, 1, monotonic monotonic
|
|
|
|
; } while(!%res.success)
|
|
|
|
; fence acquire;
|
|
|
|
; }
|
|
|
|
; Given we chose to forward across the release fence, we clearly can't forward
|
|
|
|
; across the acquire fence as well.
|
|
|
|
|
2018-02-14 06:27:17 +08:00
|
|
|
; CHECK: [[var_a]] = !DILocalVariable
|
|
|
|
; CHECK-NEXT: [[var_a2]] = !DILocalVariable
|