2014-06-11 14:44:53 +08:00
|
|
|
; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
|
2015-04-11 08:06:36 +08:00
|
|
|
; RUN: llc %s -O0 -o - -arm-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
|
|
|
|
; RUN: llc %s -O0 -o - -arm-global-merge=true | FileCheck -check-prefix=MERGE %s
|
[AArch64, ARM] Enable GlobalMerge with -O3 rather than -O1.
The pass used to be enabled by default with CodeGenOpt::Less (-O1).
This is too aggressive, considering the pass indiscriminately merges
all globals together.
Currently, performance doesn't always improve, and, on code that uses
few globals (e.g., the odd file- or function- static), more often than
not is degraded by the optimization. Lengthy discussion can be found
on llvmdev (AArch64-focused; ARM has similar problems):
http://lists.cs.uiuc.edu/pipermail/llvmdev/2015-February/082800.html
Also, it makes tooling and debuggers less useful when dealing with
globals and data sections.
GlobalMerge needs to better identify those cases that benefit, and this
will be done separately. In the meantime, move the pass to run with
-O3 rather than -O1, on both ARM and AArch64.
llvm-svn: 233024
2015-03-24 05:17:36 +08:00
|
|
|
; RUN: llc %s -O1 -o - | FileCheck -check-prefix=NO-MERGE %s
|
2015-04-11 08:06:36 +08:00
|
|
|
; RUN: llc %s -O1 -o - -arm-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
|
|
|
|
; RUN: llc %s -O1 -o - -arm-global-merge=true | FileCheck -check-prefix=MERGE %s
|
[AArch64, ARM] Enable GlobalMerge with -O3 rather than -O1.
The pass used to be enabled by default with CodeGenOpt::Less (-O1).
This is too aggressive, considering the pass indiscriminately merges
all globals together.
Currently, performance doesn't always improve, and, on code that uses
few globals (e.g., the odd file- or function- static), more often than
not is degraded by the optimization. Lengthy discussion can be found
on llvmdev (AArch64-focused; ARM has similar problems):
http://lists.cs.uiuc.edu/pipermail/llvmdev/2015-February/082800.html
Also, it makes tooling and debuggers less useful when dealing with
globals and data sections.
GlobalMerge needs to better identify those cases that benefit, and this
will be done separately. In the meantime, move the pass to run with
-O3 rather than -O1, on both ARM and AArch64.
llvm-svn: 233024
2015-03-24 05:17:36 +08:00
|
|
|
; RUN: llc %s -O3 -o - | FileCheck -check-prefix=MERGE %s
|
2015-04-11 08:06:36 +08:00
|
|
|
; RUN: llc %s -O3 -o - -arm-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
|
|
|
|
; RUN: llc %s -O3 -o - -arm-global-merge=true | FileCheck -check-prefix=MERGE %s
|
2014-06-11 14:44:53 +08:00
|
|
|
|
|
|
|
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
|
|
|
|
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
|
|
|
|
; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
|
2015-08-11 23:48:04 +08:00
|
|
|
; MERGE: .zerofill __DATA,__bss,l__MergedGlobals,60,4
|
2014-06-11 14:44:53 +08:00
|
|
|
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
|
|
|
|
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
|
|
|
|
; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
|
|
|
|
|
2015-08-11 23:48:04 +08:00
|
|
|
; NO-MERGE-NOT: .zerofill __DATA,__bss,l__MergedGlobals,60,4
|
2014-06-11 14:44:53 +08:00
|
|
|
; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
|
|
|
|
; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
|
|
|
|
; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
|
2015-08-11 23:48:04 +08:00
|
|
|
; NO-MERGE-NOT: .zerofill __DATA,__bss,l__MergedGlobals,60,4
|
2014-06-11 14:44:53 +08:00
|
|
|
|
|
|
|
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
|
|
|
|
target triple = "thumbv7-apple-ios3.0.0"
|
|
|
|
|
|
|
|
@bar = internal global [5 x i32] zeroinitializer, align 4
|
|
|
|
@baz = internal global [5 x i32] zeroinitializer, align 4
|
|
|
|
@foo = internal global [5 x i32] zeroinitializer, align 4
|
|
|
|
|
|
|
|
; Function Attrs: nounwind ssp
|
|
|
|
define internal void @initialize() #0 {
|
|
|
|
%1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
2015-03-14 02:20:45 +08:00
|
|
|
store i32 %1, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i32 0, i32 0), align 4, !tbaa !1
|
2014-06-11 14:44:53 +08:00
|
|
|
%2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
2015-03-14 02:20:45 +08:00
|
|
|
store i32 %2, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i32 0, i32 0), align 4, !tbaa !1
|
2014-06-11 14:44:53 +08:00
|
|
|
%3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
2015-03-14 02:20:45 +08:00
|
|
|
store i32 %3, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i32 0, i32 1), align 4, !tbaa !1
|
2014-06-11 14:44:53 +08:00
|
|
|
%4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
2015-03-14 02:20:45 +08:00
|
|
|
store i32 %4, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i32 0, i32 1), align 4, !tbaa !1
|
2014-06-11 14:44:53 +08:00
|
|
|
%5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
2015-03-14 02:20:45 +08:00
|
|
|
store i32 %5, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i32 0, i32 2), align 4, !tbaa !1
|
2014-06-11 14:44:53 +08:00
|
|
|
%6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
2015-03-14 02:20:45 +08:00
|
|
|
store i32 %6, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i32 0, i32 2), align 4, !tbaa !1
|
2014-06-11 14:44:53 +08:00
|
|
|
%7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
2015-03-14 02:20:45 +08:00
|
|
|
store i32 %7, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i32 0, i32 3), align 4, !tbaa !1
|
2014-06-11 14:44:53 +08:00
|
|
|
%8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
2015-03-14 02:20:45 +08:00
|
|
|
store i32 %8, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i32 0, i32 3), align 4, !tbaa !1
|
2014-06-11 14:44:53 +08:00
|
|
|
%9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
2015-03-14 02:20:45 +08:00
|
|
|
store i32 %9, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
|
2014-06-11 14:44:53 +08:00
|
|
|
%10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
|
2015-03-14 02:20:45 +08:00
|
|
|
store i32 %10, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
|
2014-06-11 14:44:53 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare i32 @calc(...) #1
|
|
|
|
|
|
|
|
; Function Attrs: nounwind ssp
|
|
|
|
define internal void @calculate() #0 {
|
2015-02-28 05:17:42 +08:00
|
|
|
%1 = load <4 x i32>, <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4
|
|
|
|
%2 = load <4 x i32>, <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4
|
2014-06-11 14:44:53 +08:00
|
|
|
%3 = mul <4 x i32> %2, %1
|
|
|
|
store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4
|
2015-03-14 02:20:45 +08:00
|
|
|
%4 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
|
|
|
|
%5 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
|
2014-06-11 14:44:53 +08:00
|
|
|
%6 = mul nsw i32 %5, %4
|
2015-03-14 02:20:45 +08:00
|
|
|
store i32 %6, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1
|
2014-06-11 14:44:53 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Function Attrs: nounwind readnone ssp
|
|
|
|
define internal i32* @returnFoo() #2 {
|
2015-03-14 02:20:45 +08:00
|
|
|
ret i32* getelementptr inbounds ([5 x i32], [5 x i32]* @foo, i32 0, i32 0)
|
2014-06-11 14:44:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
|
|
attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
|
|
attributes #2 = { nounwind readnone ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
|
|
|
attributes #3 = { nounwind }
|
|
|
|
|
|
|
|
!llvm.ident = !{!0}
|
|
|
|
|
IR: Make metadata typeless in assembly
Now that `Metadata` is typeless, reflect that in the assembly. These
are the matching assembly changes for the metadata/value split in
r223802.
- Only use the `metadata` type when referencing metadata from a call
intrinsic -- i.e., only when it's used as a `Value`.
- Stop pretending that `ValueAsMetadata` is wrapped in an `MDNode`
when referencing it from call intrinsics.
So, assembly like this:
define @foo(i32 %v) {
call void @llvm.foo(metadata !{i32 %v}, metadata !0)
call void @llvm.foo(metadata !{i32 7}, metadata !0)
call void @llvm.foo(metadata !1, metadata !0)
call void @llvm.foo(metadata !3, metadata !0)
call void @llvm.foo(metadata !{metadata !3}, metadata !0)
ret void, !bar !2
}
!0 = metadata !{metadata !2}
!1 = metadata !{i32* @global}
!2 = metadata !{metadata !3}
!3 = metadata !{}
turns into this:
define @foo(i32 %v) {
call void @llvm.foo(metadata i32 %v, metadata !0)
call void @llvm.foo(metadata i32 7, metadata !0)
call void @llvm.foo(metadata i32* @global, metadata !0)
call void @llvm.foo(metadata !3, metadata !0)
call void @llvm.foo(metadata !{!3}, metadata !0)
ret void, !bar !2
}
!0 = !{!2}
!1 = !{i32* @global}
!2 = !{!3}
!3 = !{}
I wrote an upgrade script that handled almost all of the tests in llvm
and many of the tests in cfe (even handling many `CHECK` lines). I've
attached it (or will attach it in a moment if you're speedy) to PR21532
to help everyone update their out-of-tree testcases.
This is part of PR21532.
llvm-svn: 224257
2014-12-16 03:07:53 +08:00
|
|
|
!0 = !{!"LLVM version 3.4 "}
|
|
|
|
!1 = !{!2, !2, i64 0}
|
|
|
|
!2 = !{!"int", !3, i64 0}
|
|
|
|
!3 = !{!"omnipotent char", !4, i64 0}
|
|
|
|
!4 = !{!"Simple C/C++ TBAA"}
|