2017-03-27 23:52:38 +08:00
|
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
|
|
|
|
|
|
|
|
@x = thread_local local_unnamed_addr global i32 0, align 4
|
|
|
|
@y = thread_local local_unnamed_addr global i32 0, align 4
|
|
|
|
|
|
|
|
; Machine LICM should hoist the mrs into the loop preheader.
|
|
|
|
; CHECK-LABEL: @test1
|
|
|
|
; CHECK: BB#1:
|
|
|
|
; CHECK: mrs x[[BASE:[0-9]+]], TPIDR_EL0
|
|
|
|
; CHECK: add x[[REG1:[0-9]+]], x[[BASE]], :tprel_hi12:x
|
|
|
|
; CHECK: add x[[REG2:[0-9]+]], x[[REG1]], :tprel_lo12_nc:x
|
|
|
|
;
|
|
|
|
; CHECK: .LBB0_2:
|
|
|
|
; CHECK: ldr w0, [x[[REG2]]]
|
|
|
|
; CHECK: bl bar
|
[AArch64] Prefer Bcc to CBZ/CBNZ/TBZ/TBNZ when NZCV flags can be set for "free".
This patch contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into a
conditional branch (Bcc), when the NZCV flags can be set for "free". This is
preferred on targets that have more flexibility when scheduling Bcc
instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other variables are
equal). This can reduce register pressure and is also the default behavior for
GCC.
A few examples:
add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
cbz w8, .LBB_2 -> b.eq .LBB0_2 ; single def/use of w8 removed.
add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
cbz w8, .LBB1_2 -> b.eq .LBB1_2
sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
In looking at all current sub-target machine descriptions, this transformation
appears to be either positive or neutral.
Differential Revision: https://reviews.llvm.org/D34220.
llvm-svn: 306144
2017-06-24 03:20:12 +08:00
|
|
|
; CHECK: subs w[[REG3:[0-9]+]], w{{[0-9]+}}, #1
|
|
|
|
; CHECK: b.ne .LBB0_2
|
2017-03-27 23:52:38 +08:00
|
|
|
|
|
|
|
define void @test1(i32 %n) local_unnamed_addr {
|
|
|
|
entry:
|
|
|
|
%cmp3 = icmp sgt i32 %n, 0
|
|
|
|
br i1 %cmp3, label %bb1, label %bb2
|
|
|
|
|
|
|
|
bb1:
|
|
|
|
br label %for.body
|
|
|
|
|
|
|
|
for.body:
|
|
|
|
%i.04 = phi i32 [ %inc, %for.body ], [ 0, %bb1 ]
|
|
|
|
%0 = load i32, i32* @x, align 4
|
|
|
|
tail call void @bar(i32 %0) #2
|
|
|
|
%inc = add nuw nsw i32 %i.04, 1
|
|
|
|
%exitcond = icmp eq i32 %inc, %n
|
|
|
|
br i1 %exitcond, label %bb2, label %for.body
|
|
|
|
|
|
|
|
bb2:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Machine CSE should combine the the mrs between the load of %x and %y.
|
|
|
|
; CHECK-LABEL: @test2
|
|
|
|
; CHECK: mrs x{{[0-9]+}}, TPIDR_EL0
|
|
|
|
; CHECK-NOT: mrs x{{[0-9]+}}, TPIDR_EL0
|
|
|
|
; CHECK: ret
|
|
|
|
define void @test2(i32 %c) local_unnamed_addr #0 {
|
|
|
|
entry:
|
|
|
|
%0 = load i32, i32* @x, align 4
|
|
|
|
tail call void @bar(i32 %0) #2
|
|
|
|
%cmp = icmp eq i32 %c, 0
|
|
|
|
br i1 %cmp, label %if.end, label %if.then
|
|
|
|
|
|
|
|
if.then:
|
|
|
|
%1 = load i32, i32* @y, align 4
|
|
|
|
tail call void @bar(i32 %1) #2
|
|
|
|
br label %if.end
|
|
|
|
|
|
|
|
if.end:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @bar(i32) local_unnamed_addr
|