2015-02-28 02:32:11 +08:00
|
|
|
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
|
[CodeGenPrepare] Split branch conditions into multiple conditional branches.
This optimization transforms code like:
bb1:
%0 = icmp ne i32 %a, 0
%1 = icmp ne i32 %b, 0
%or.cond = or i1 %0, %1
br i1 %or.cond, label %TrueBB, label %FalseBB
into a multiple branch instructions like:
bb1:
%0 = icmp ne i32 %a, 0
br i1 %0, label %TrueBB, label %bb2
bb2:
%1 = icmp ne i32 %b, 0
br i1 %1, label %TrueBB, label %FalseBB
This optimization is already performed by SelectionDAG, but not by FastISel.
FastISel cannot perform this optimization, because it cannot generate new
MachineBasicBlocks.
Performing this optimization at CodeGenPrepare time makes it available to both -
SelectionDAG and FastISel - and the implementation in SelectiuonDAG could be
removed. There are currenty a few differences in codegen for X86 and PPC, so
this commmit only enables it for FastISel.
Reviewed by Jim Grosbach
This fixes rdar://problem/19034919.
llvm-svn: 223786
2014-12-10 00:36:13 +08:00
|
|
|
|
2015-08-26 02:12:40 +08:00
|
|
|
; CHECK-LABEL: test_or
|
[CodeGenPrepare] Split branch conditions into multiple conditional branches.
This optimization transforms code like:
bb1:
%0 = icmp ne i32 %a, 0
%1 = icmp ne i32 %b, 0
%or.cond = or i1 %0, %1
br i1 %or.cond, label %TrueBB, label %FalseBB
into a multiple branch instructions like:
bb1:
%0 = icmp ne i32 %a, 0
br i1 %0, label %TrueBB, label %bb2
bb2:
%1 = icmp ne i32 %b, 0
br i1 %1, label %TrueBB, label %FalseBB
This optimization is already performed by SelectionDAG, but not by FastISel.
FastISel cannot perform this optimization, because it cannot generate new
MachineBasicBlocks.
Performing this optimization at CodeGenPrepare time makes it available to both -
SelectionDAG and FastISel - and the implementation in SelectiuonDAG could be
removed. There are currenty a few differences in codegen for X86 and PPC, so
this commmit only enables it for FastISel.
Reviewed by Jim Grosbach
This fixes rdar://problem/19034919.
llvm-svn: 223786
2014-12-10 00:36:13 +08:00
|
|
|
; CHECK: cbnz w0, {{LBB[0-9]+_2}}
|
|
|
|
; CHECK: cbz w1, {{LBB[0-9]+_1}}
|
|
|
|
define i64 @test_or(i32 %a, i32 %b) {
|
|
|
|
bb1:
|
|
|
|
%0 = icmp eq i32 %a, 0
|
|
|
|
%1 = icmp eq i32 %b, 0
|
|
|
|
%or.cond = or i1 %0, %1
|
|
|
|
br i1 %or.cond, label %bb3, label %bb4, !prof !0
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
ret i64 0
|
|
|
|
|
|
|
|
bb4:
|
|
|
|
%2 = call i64 @bar()
|
|
|
|
ret i64 %2
|
|
|
|
}
|
|
|
|
|
2015-08-26 02:12:40 +08:00
|
|
|
; CHECK-LABEL: test_and
|
Revive http://reviews.llvm.org/D12778 to handle forward-hot-prob and backward-hot-prob consistently.
Summary:
Consider the following diamond CFG:
A
/ \
B C
\/
D
Suppose A->B and A->C have probabilities 81% and 19%. In block-placement, A->B is called a hot edge and the final placement should be ABDC. However, the current implementation outputs ABCD. This is because when choosing the next block of B, it checks if Freq(C->D) > Freq(B->D) * 20%, which is true (if Freq(A) = 100, then Freq(B->D) = 81, Freq(C->D) = 19, and 19 > 81*20%=16.2). Actually, we should use 25% instead of 20% as the probability here, so that we have 19 < 81*25%=20.25, and the desired ABDC layout will be generated.
Reviewers: djasper, davidxl
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D20989
llvm-svn: 272203
2016-06-09 05:30:12 +08:00
|
|
|
; CHECK: cbnz w0, {{LBB[0-9]+_2}}
|
|
|
|
; CHECK: cbz w1, {{LBB[0-9]+_1}}
|
[CodeGenPrepare] Split branch conditions into multiple conditional branches.
This optimization transforms code like:
bb1:
%0 = icmp ne i32 %a, 0
%1 = icmp ne i32 %b, 0
%or.cond = or i1 %0, %1
br i1 %or.cond, label %TrueBB, label %FalseBB
into a multiple branch instructions like:
bb1:
%0 = icmp ne i32 %a, 0
br i1 %0, label %TrueBB, label %bb2
bb2:
%1 = icmp ne i32 %b, 0
br i1 %1, label %TrueBB, label %FalseBB
This optimization is already performed by SelectionDAG, but not by FastISel.
FastISel cannot perform this optimization, because it cannot generate new
MachineBasicBlocks.
Performing this optimization at CodeGenPrepare time makes it available to both -
SelectionDAG and FastISel - and the implementation in SelectiuonDAG could be
removed. There are currenty a few differences in codegen for X86 and PPC, so
this commmit only enables it for FastISel.
Reviewed by Jim Grosbach
This fixes rdar://problem/19034919.
llvm-svn: 223786
2014-12-10 00:36:13 +08:00
|
|
|
define i64 @test_and(i32 %a, i32 %b) {
|
|
|
|
bb1:
|
|
|
|
%0 = icmp ne i32 %a, 0
|
|
|
|
%1 = icmp ne i32 %b, 0
|
|
|
|
%or.cond = and i1 %0, %1
|
|
|
|
br i1 %or.cond, label %bb4, label %bb3, !prof !1
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
ret i64 0
|
|
|
|
|
|
|
|
bb4:
|
|
|
|
%2 = call i64 @bar()
|
|
|
|
ret i64 %2
|
|
|
|
}
|
|
|
|
|
2015-09-03 03:23:23 +08:00
|
|
|
; If the branch is unpredictable, don't add another branch.
|
|
|
|
|
|
|
|
; CHECK-LABEL: test_or_unpredictable
|
|
|
|
; CHECK: cmp w0, #0
|
|
|
|
; CHECK-NEXT: cset w8, eq
|
|
|
|
; CHECK-NEXT: cmp w1, #0
|
|
|
|
; CHECK-NEXT: cset w9, eq
|
|
|
|
; CHECK-NEXT: orr w8, w8, w9
|
2015-12-04 01:19:58 +08:00
|
|
|
; CHECK-NEXT: tbnz w8, #0,
|
2015-09-03 03:23:23 +08:00
|
|
|
define i64 @test_or_unpredictable(i32 %a, i32 %b) {
|
|
|
|
bb1:
|
|
|
|
%0 = icmp eq i32 %a, 0
|
|
|
|
%1 = icmp eq i32 %b, 0
|
|
|
|
%or.cond = or i1 %0, %1
|
|
|
|
br i1 %or.cond, label %bb3, label %bb4, !unpredictable !2
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
ret i64 0
|
|
|
|
|
|
|
|
bb4:
|
|
|
|
%2 = call i64 @bar()
|
|
|
|
ret i64 %2
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: test_and_unpredictable
|
|
|
|
; CHECK: cmp w0, #0
|
|
|
|
; CHECK-NEXT: cset w8, ne
|
|
|
|
; CHECK-NEXT: cmp w1, #0
|
|
|
|
; CHECK-NEXT: cset w9, ne
|
|
|
|
; CHECK-NEXT: and w8, w8, w9
|
2015-12-04 01:19:58 +08:00
|
|
|
; CHECK-NEXT: tbz w8, #0,
|
2015-09-03 03:23:23 +08:00
|
|
|
define i64 @test_and_unpredictable(i32 %a, i32 %b) {
|
|
|
|
bb1:
|
|
|
|
%0 = icmp ne i32 %a, 0
|
|
|
|
%1 = icmp ne i32 %b, 0
|
|
|
|
%or.cond = and i1 %0, %1
|
|
|
|
br i1 %or.cond, label %bb4, label %bb3, !unpredictable !2
|
|
|
|
|
|
|
|
bb3:
|
|
|
|
ret i64 0
|
|
|
|
|
|
|
|
bb4:
|
|
|
|
%2 = call i64 @bar()
|
|
|
|
ret i64 %2
|
|
|
|
}
|
|
|
|
|
[CodeGenPrepare] Split branch conditions into multiple conditional branches.
This optimization transforms code like:
bb1:
%0 = icmp ne i32 %a, 0
%1 = icmp ne i32 %b, 0
%or.cond = or i1 %0, %1
br i1 %or.cond, label %TrueBB, label %FalseBB
into a multiple branch instructions like:
bb1:
%0 = icmp ne i32 %a, 0
br i1 %0, label %TrueBB, label %bb2
bb2:
%1 = icmp ne i32 %b, 0
br i1 %1, label %TrueBB, label %FalseBB
This optimization is already performed by SelectionDAG, but not by FastISel.
FastISel cannot perform this optimization, because it cannot generate new
MachineBasicBlocks.
Performing this optimization at CodeGenPrepare time makes it available to both -
SelectionDAG and FastISel - and the implementation in SelectiuonDAG could be
removed. There are currenty a few differences in codegen for X86 and PPC, so
this commmit only enables it for FastISel.
Reviewed by Jim Grosbach
This fixes rdar://problem/19034919.
llvm-svn: 223786
2014-12-10 00:36:13 +08:00
|
|
|
declare i64 @bar()
|
|
|
|
|
IR: Make metadata typeless in assembly
Now that `Metadata` is typeless, reflect that in the assembly. These
are the matching assembly changes for the metadata/value split in
r223802.
- Only use the `metadata` type when referencing metadata from a call
intrinsic -- i.e., only when it's used as a `Value`.
- Stop pretending that `ValueAsMetadata` is wrapped in an `MDNode`
when referencing it from call intrinsics.
So, assembly like this:
define @foo(i32 %v) {
call void @llvm.foo(metadata !{i32 %v}, metadata !0)
call void @llvm.foo(metadata !{i32 7}, metadata !0)
call void @llvm.foo(metadata !1, metadata !0)
call void @llvm.foo(metadata !3, metadata !0)
call void @llvm.foo(metadata !{metadata !3}, metadata !0)
ret void, !bar !2
}
!0 = metadata !{metadata !2}
!1 = metadata !{i32* @global}
!2 = metadata !{metadata !3}
!3 = metadata !{}
turns into this:
define @foo(i32 %v) {
call void @llvm.foo(metadata i32 %v, metadata !0)
call void @llvm.foo(metadata i32 7, metadata !0)
call void @llvm.foo(metadata i32* @global, metadata !0)
call void @llvm.foo(metadata !3, metadata !0)
call void @llvm.foo(metadata !{!3}, metadata !0)
ret void, !bar !2
}
!0 = !{!2}
!1 = !{i32* @global}
!2 = !{!3}
!3 = !{}
I wrote an upgrade script that handled almost all of the tests in llvm
and many of the tests in cfe (even handling many `CHECK` lines). I've
attached it (or will attach it in a moment if you're speedy) to PR21532
to help everyone update their out-of-tree testcases.
This is part of PR21532.
llvm-svn: 224257
2014-12-16 03:07:53 +08:00
|
|
|
!0 = !{!"branch_weights", i32 5128, i32 32}
|
|
|
|
!1 = !{!"branch_weights", i32 1024, i32 4136}
|
2015-09-03 03:23:23 +08:00
|
|
|
!2 = !{}
|
|
|
|
|