2014-05-30 18:09:59 +08:00
|
|
|
; RUN: llc < %s -mtriple=thumbv8 -arm-atomic-cfg-tidy=0 | FileCheck %s
|
|
|
|
; RUN: llc < %s -mtriple=thumbv7 -arm-atomic-cfg-tidy=0 -arm-restrict-it | FileCheck %s
|
|
|
|
; RUN: llc < %s -mtriple=thumbv8 -arm-atomic-cfg-tidy=0 -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
|
|
|
|
; RUN: llc < %s -mtriple=thumbv7 -arm-atomic-cfg-tidy=0 -arm-restrict-it -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
|
2013-09-09 22:21:49 +08:00
|
|
|
|
|
|
|
%struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* }
|
|
|
|
%struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 }
|
|
|
|
|
|
|
|
@FuncPtr = external hidden unnamed_addr global %struct.FF*
|
|
|
|
@.str1 = external hidden unnamed_addr constant [6 x i8], align 4
|
|
|
|
@G = external unnamed_addr global i32
|
|
|
|
@.str2 = external hidden unnamed_addr constant [58 x i8], align 4
|
|
|
|
@.str3 = external hidden unnamed_addr constant [58 x i8], align 4
|
|
|
|
|
|
|
|
define i32 @test() nounwind optsize ssp {
|
|
|
|
entry:
|
|
|
|
; CHECK-LABEL: test:
|
|
|
|
; CHECK: push
|
|
|
|
; CHECK-NOT: push
|
|
|
|
%block_size = alloca i32, align 4
|
|
|
|
%block_count = alloca i32, align 4
|
|
|
|
%index_cache = alloca i32, align 4
|
|
|
|
store i32 0, i32* %index_cache, align 4
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp = load i32, i32* @G, align 4
|
2013-09-09 22:21:49 +08:00
|
|
|
%tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
|
|
|
|
switch i32 %tmp1, label %bb8 [
|
2015-04-24 04:31:30 +08:00
|
|
|
i32 1, label %bb
|
2013-09-09 22:21:49 +08:00
|
|
|
i32 536870913, label %bb4
|
|
|
|
i32 536870914, label %bb6
|
|
|
|
]
|
|
|
|
|
|
|
|
bb:
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp2 = load i32, i32* @G, align 4
|
2015-04-24 04:31:30 +08:00
|
|
|
%tmp4 = icmp eq i32 %tmp2, 1
|
2013-09-09 22:21:49 +08:00
|
|
|
br i1 %tmp4, label %bb1, label %bb8
|
|
|
|
|
|
|
|
bb1:
|
Distribute the weight on the edge from switch to default statement to edges generated in lowering switch.
Currently, when edge weights are assigned to edges that are created when lowering switch statement, the weight on the edge to default statement (let's call it "default weight" here) is not considered. We need to distribute this weight properly. However, without value profiling, we have no idea how to distribute it. In this patch, I applied the heuristic that this weight is evenly distributed to successors.
For example, given a switch statement with cases 1,2,3,5,10,11,20, and every edge from switch to each successor has weight 10. If there is a binary search tree built to test if n < 10, then its two out-edges will have weight 4x10+10/2 = 45 and 3x10 + 10/2 = 35 respectively (currently they are 40 and 30 without considering the default weight). Each distribution (which is 5 here) will be stored in each SwitchWorkListItem for further distribution.
There are some exceptions:
For a jump table header which doesn't have any edge to default statement, we don't distribute the default weight to it.
For a bit test header which covers a contiguous range and hence has no edges to default statement, we don't distribute the default weight to it.
When the branch checks a single value or a contiguous range with no edge to default statement, we don't distribute the default weight to it.
In other cases, the default weight is evenly distributed to successors.
Differential Revision: http://reviews.llvm.org/D12418
llvm-svn: 246522
2015-09-01 09:42:16 +08:00
|
|
|
; CHECK: %entry
|
2013-09-09 22:21:49 +08:00
|
|
|
; CHECK: it eq
|
|
|
|
; CHECK-NEXT: ldreq
|
|
|
|
; CHECK-NEXT: it eq
|
|
|
|
; CHECK-NEXT: cmpeq
|
|
|
|
; CHECK: %bb1
|
2015-02-28 05:17:42 +08:00
|
|
|
%tmp5 = load i32, i32* %block_size, align 4
|
|
|
|
%tmp6 = load i32, i32* %block_count, align 4
|
2013-09-09 22:21:49 +08:00
|
|
|
%tmp7 = call %struct.FF* @Get() nounwind
|
|
|
|
store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
|
|
|
|
%tmp10 = zext i32 %tmp6 to i64
|
|
|
|
%tmp11 = zext i32 %tmp5 to i64
|
|
|
|
%tmp12 = mul nsw i64 %tmp10, %tmp11
|
2015-03-14 02:20:45 +08:00
|
|
|
%tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
|
2013-09-09 22:21:49 +08:00
|
|
|
br label %bb8
|
|
|
|
|
|
|
|
bb4:
|
|
|
|
; CHECK-PIC: cmp
|
|
|
|
; CHECK-PIC: cmp
|
Distribute the weight on the edge from switch to default statement to edges generated in lowering switch.
Currently, when edge weights are assigned to edges that are created when lowering switch statement, the weight on the edge to default statement (let's call it "default weight" here) is not considered. We need to distribute this weight properly. However, without value profiling, we have no idea how to distribute it. In this patch, I applied the heuristic that this weight is evenly distributed to successors.
For example, given a switch statement with cases 1,2,3,5,10,11,20, and every edge from switch to each successor has weight 10. If there is a binary search tree built to test if n < 10, then its two out-edges will have weight 4x10+10/2 = 45 and 3x10 + 10/2 = 35 respectively (currently they are 40 and 30 without considering the default weight). Each distribution (which is 5 here) will be stored in each SwitchWorkListItem for further distribution.
There are some exceptions:
For a jump table header which doesn't have any edge to default statement, we don't distribute the default weight to it.
For a bit test header which covers a contiguous range and hence has no edges to default statement, we don't distribute the default weight to it.
When the branch checks a single value or a contiguous range with no edge to default statement, we don't distribute the default weight to it.
In other cases, the default weight is evenly distributed to successors.
Differential Revision: http://reviews.llvm.org/D12418
llvm-svn: 246522
2015-09-01 09:42:16 +08:00
|
|
|
; CHECK-PIC: cmp
|
2017-11-14 04:45:38 +08:00
|
|
|
; CHECK-PIC: it eq
|
|
|
|
; CHECK-PIC-NEXT: ldreq
|
|
|
|
; CHECK-PIC-NEXT: it eq
|
|
|
|
; CHECK-PIC-NEXT: cmpeq
|
|
|
|
; CHECK-PIC-NEXT: beq
|
Distribute the weight on the edge from switch to default statement to edges generated in lowering switch.
Currently, when edge weights are assigned to edges that are created when lowering switch statement, the weight on the edge to default statement (let's call it "default weight" here) is not considered. We need to distribute this weight properly. However, without value profiling, we have no idea how to distribute it. In this patch, I applied the heuristic that this weight is evenly distributed to successors.
For example, given a switch statement with cases 1,2,3,5,10,11,20, and every edge from switch to each successor has weight 10. If there is a binary search tree built to test if n < 10, then its two out-edges will have weight 4x10+10/2 = 45 and 3x10 + 10/2 = 35 respectively (currently they are 40 and 30 without considering the default weight). Each distribution (which is 5 here) will be stored in each SwitchWorkListItem for further distribution.
There are some exceptions:
For a jump table header which doesn't have any edge to default statement, we don't distribute the default weight to it.
For a bit test header which covers a contiguous range and hence has no edges to default statement, we don't distribute the default weight to it.
When the branch checks a single value or a contiguous range with no edge to default statement, we don't distribute the default weight to it.
In other cases, the default weight is evenly distributed to successors.
Differential Revision: http://reviews.llvm.org/D12418
llvm-svn: 246522
2015-09-01 09:42:16 +08:00
|
|
|
; CHECK-PIC: %bb6
|
[Codegen] Merge tail blocks with no successors after block placement
Summary:
I found the following case having tail blocks with no successors merging opportunities after block placement.
Before block placement:
bb0:
...
bne a0, 0, bb2:
bb1:
mv a0, 1
ret
bb2:
...
bb3:
mv a0, 1
ret
bb4:
mv a0, -1
ret
The conditional branch bne in bb0 is opposite to beq.
After block placement:
bb0:
...
beq a0, 0, bb1
bb2:
...
bb4:
mv a0, -1
ret
bb1:
mv a0, 1
ret
bb3:
mv a0, 1
ret
After block placement, that appears new tail merging opportunity, bb1 and bb3 can be merged as one block. So the conditional constraint for merging tail blocks with no successors should be removed. In my experiment for RISC-V, it decreases code size.
Author of original patch: Jim Lin
Reviewers: haicheng, aheejin, craig.topper, rnk, RKSimon, Jim, dmgreen
Reviewed By: Jim, dmgreen
Subscribers: xbolva00, dschuff, javed.absar, sbc100, jgravelle-google, aheejin, kito-cheng, dmgreen, PkmX, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D54411
llvm-svn: 363284
2019-06-14 02:11:32 +08:00
|
|
|
; CHECK-PIC: mov
|
2013-09-09 22:21:49 +08:00
|
|
|
ret i32 0
|
|
|
|
|
|
|
|
bb6:
|
|
|
|
ret i32 1
|
|
|
|
|
|
|
|
bb8:
|
|
|
|
ret i32 -1
|
|
|
|
}
|
|
|
|
|
|
|
|
declare i32 @printf(i8*, ...)
|
|
|
|
|
|
|
|
declare %struct.FF* @Get()
|
|
|
|
|
|
|
|
declare i32 @foo(i8*, i64, i32)
|
|
|
|
|
|
|
|
declare i32 @bar(i32, i32, i32)
|