[AArch64] Prefer Bcc to CBZ/CBNZ/TBZ/TBNZ when NZCV flags can be set for "free".
This patch contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into a
conditional branch (Bcc), when the NZCV flags can be set for "free". This is
preferred on targets that have more flexibility when scheduling Bcc
instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other variables are
equal). This can reduce register pressure and is also the default behavior for
GCC.
A few examples:
add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
cbz w8, .LBB_2 -> b.eq .LBB0_2 ; single def/use of w8 removed.
add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
cbz w8, .LBB1_2 -> b.eq .LBB1_2
sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
In looking at all current sub-target machine descriptions, this transformation
appears to be either positive or neutral.
Differential Revision: https://reviews.llvm.org/D34220.
llvm-svn: 306144
2017-06-24 03:20:12 +08:00
|
|
|
; RUN: llc < %s -O3 -mtriple=aarch64-eabi -verify-machineinstrs | FileCheck %s
|
|
|
|
|
|
|
|
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
|
|
target triple = "aarch64-linaro-linux-gnueabi"
|
|
|
|
|
|
|
|
; CMN is an alias of ADDS.
|
|
|
|
; CHECK-LABEL: test_add_cbz:
|
|
|
|
; CHECK: cmn w0, w1
|
|
|
|
; CHECK: b.eq
|
|
|
|
; CHECK: ret
|
|
|
|
define void @test_add_cbz(i32 %a, i32 %b, i32* %ptr) {
|
|
|
|
%c = add nsw i32 %a, %b
|
|
|
|
%d = icmp ne i32 %c, 0
|
|
|
|
br i1 %d, label %L1, label %L2
|
|
|
|
L1:
|
|
|
|
store i32 0, i32* %ptr, align 4
|
|
|
|
ret void
|
|
|
|
L2:
|
|
|
|
store i32 1, i32* %ptr, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: test_add_cbz_multiple_use:
|
|
|
|
; CHECK: adds
|
|
|
|
; CHECK: b.eq
|
|
|
|
; CHECK: ret
|
|
|
|
define void @test_add_cbz_multiple_use(i32 %a, i32 %b, i32* %ptr) {
|
|
|
|
%c = add nsw i32 %a, %b
|
|
|
|
%d = icmp ne i32 %c, 0
|
|
|
|
br i1 %d, label %L1, label %L2
|
|
|
|
L1:
|
|
|
|
store i32 0, i32* %ptr, align 4
|
|
|
|
ret void
|
|
|
|
L2:
|
|
|
|
store i32 %c, i32* %ptr, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: test_add_cbz_64:
|
|
|
|
; CHECK: cmn x0, x1
|
|
|
|
; CHECK: b.eq
|
|
|
|
define void @test_add_cbz_64(i64 %a, i64 %b, i64* %ptr) {
|
|
|
|
%c = add nsw i64 %a, %b
|
|
|
|
%d = icmp ne i64 %c, 0
|
|
|
|
br i1 %d, label %L1, label %L2
|
|
|
|
L1:
|
|
|
|
store i64 0, i64* %ptr, align 4
|
|
|
|
ret void
|
|
|
|
L2:
|
|
|
|
store i64 1, i64* %ptr, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: test_and_cbz:
|
|
|
|
; CHECK: tst w0, #0x6
|
|
|
|
; CHECK: b.eq
|
|
|
|
define void @test_and_cbz(i32 %a, i32* %ptr) {
|
|
|
|
%c = and i32 %a, 6
|
|
|
|
%d = icmp ne i32 %c, 0
|
|
|
|
br i1 %d, label %L1, label %L2
|
|
|
|
L1:
|
|
|
|
store i32 0, i32* %ptr, align 4
|
|
|
|
ret void
|
|
|
|
L2:
|
|
|
|
store i32 1, i32* %ptr, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: test_bic_cbnz:
|
|
|
|
; CHECK: bics wzr, w1, w0
|
|
|
|
; CHECK: b.ne
|
|
|
|
define void @test_bic_cbnz(i32 %a, i32 %b, i32* %ptr) {
|
|
|
|
%c = and i32 %a, %b
|
|
|
|
%d = icmp eq i32 %c, %b
|
|
|
|
br i1 %d, label %L1, label %L2
|
|
|
|
L1:
|
|
|
|
store i32 0, i32* %ptr, align 4
|
|
|
|
ret void
|
|
|
|
L2:
|
|
|
|
store i32 1, i32* %ptr, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: test_add_tbz:
|
|
|
|
; CHECK: adds
|
2017-06-28 23:09:11 +08:00
|
|
|
; CHECK: b.pl
|
[AArch64] Prefer Bcc to CBZ/CBNZ/TBZ/TBNZ when NZCV flags can be set for "free".
This patch contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into a
conditional branch (Bcc), when the NZCV flags can be set for "free". This is
preferred on targets that have more flexibility when scheduling Bcc
instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other variables are
equal). This can reduce register pressure and is also the default behavior for
GCC.
A few examples:
add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
cbz w8, .LBB_2 -> b.eq .LBB0_2 ; single def/use of w8 removed.
add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
cbz w8, .LBB1_2 -> b.eq .LBB1_2
sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
In looking at all current sub-target machine descriptions, this transformation
appears to be either positive or neutral.
Differential Revision: https://reviews.llvm.org/D34220.
llvm-svn: 306144
2017-06-24 03:20:12 +08:00
|
|
|
; CHECK: ret
|
|
|
|
define void @test_add_tbz(i32 %a, i32 %b, i32* %ptr) {
|
|
|
|
entry:
|
|
|
|
%add = add nsw i32 %a, %b
|
|
|
|
%cmp36 = icmp sge i32 %add, 0
|
|
|
|
br i1 %cmp36, label %L2, label %L1
|
|
|
|
L1:
|
|
|
|
store i32 %add, i32* %ptr, align 8
|
|
|
|
br label %L2
|
|
|
|
L2:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: test_subs_tbz:
|
|
|
|
; CHECK: subs
|
2017-06-28 23:09:11 +08:00
|
|
|
; CHECK: b.pl
|
[AArch64] Prefer Bcc to CBZ/CBNZ/TBZ/TBNZ when NZCV flags can be set for "free".
This patch contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into a
conditional branch (Bcc), when the NZCV flags can be set for "free". This is
preferred on targets that have more flexibility when scheduling Bcc
instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other variables are
equal). This can reduce register pressure and is also the default behavior for
GCC.
A few examples:
add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
cbz w8, .LBB_2 -> b.eq .LBB0_2 ; single def/use of w8 removed.
add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
cbz w8, .LBB1_2 -> b.eq .LBB1_2
sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
In looking at all current sub-target machine descriptions, this transformation
appears to be either positive or neutral.
Differential Revision: https://reviews.llvm.org/D34220.
llvm-svn: 306144
2017-06-24 03:20:12 +08:00
|
|
|
; CHECK: ret
|
|
|
|
define void @test_subs_tbz(i32 %a, i32 %b, i32* %ptr) {
|
|
|
|
entry:
|
|
|
|
%sub = sub nsw i32 %a, %b
|
|
|
|
%cmp36 = icmp sge i32 %sub, 0
|
|
|
|
br i1 %cmp36, label %L2, label %L1
|
|
|
|
L1:
|
|
|
|
store i32 %sub, i32* %ptr, align 8
|
|
|
|
br label %L2
|
|
|
|
L2:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: test_add_tbnz
|
|
|
|
; CHECK: adds
|
2017-06-28 23:09:11 +08:00
|
|
|
; CHECK: b.mi
|
[AArch64] Prefer Bcc to CBZ/CBNZ/TBZ/TBNZ when NZCV flags can be set for "free".
This patch contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into a
conditional branch (Bcc), when the NZCV flags can be set for "free". This is
preferred on targets that have more flexibility when scheduling Bcc
instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other variables are
equal). This can reduce register pressure and is also the default behavior for
GCC.
A few examples:
add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
cbz w8, .LBB_2 -> b.eq .LBB0_2 ; single def/use of w8 removed.
add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
cbz w8, .LBB1_2 -> b.eq .LBB1_2
sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
In looking at all current sub-target machine descriptions, this transformation
appears to be either positive or neutral.
Differential Revision: https://reviews.llvm.org/D34220.
llvm-svn: 306144
2017-06-24 03:20:12 +08:00
|
|
|
; CHECK: ret
|
|
|
|
define void @test_add_tbnz(i32 %a, i32 %b, i32* %ptr) {
|
|
|
|
entry:
|
|
|
|
%add = add nsw i32 %a, %b
|
|
|
|
%cmp36 = icmp slt i32 %add, 0
|
|
|
|
br i1 %cmp36, label %L2, label %L1
|
|
|
|
L1:
|
|
|
|
store i32 %add, i32* %ptr, align 8
|
|
|
|
br label %L2
|
|
|
|
L2:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: test_subs_tbnz
|
|
|
|
; CHECK: subs
|
2017-06-28 23:09:11 +08:00
|
|
|
; CHECK: b.mi
|
[AArch64] Prefer Bcc to CBZ/CBNZ/TBZ/TBNZ when NZCV flags can be set for "free".
This patch contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions into a
conditional branch (Bcc), when the NZCV flags can be set for "free". This is
preferred on targets that have more flexibility when scheduling Bcc
instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming all other variables are
equal). This can reduce register pressure and is also the default behavior for
GCC.
A few examples:
add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS.
cbz w8, .LBB_2 -> b.eq .LBB0_2 ; single def/use of w8 removed.
add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses.
cbz w8, .LBB1_2 -> b.eq .LBB1_2
sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
In looking at all current sub-target machine descriptions, this transformation
appears to be either positive or neutral.
Differential Revision: https://reviews.llvm.org/D34220.
llvm-svn: 306144
2017-06-24 03:20:12 +08:00
|
|
|
; CHECK: ret
|
|
|
|
define void @test_subs_tbnz(i32 %a, i32 %b, i32* %ptr) {
|
|
|
|
entry:
|
|
|
|
%sub = sub nsw i32 %a, %b
|
|
|
|
%cmp36 = icmp slt i32 %sub, 0
|
|
|
|
br i1 %cmp36, label %L2, label %L1
|
|
|
|
L1:
|
|
|
|
store i32 %sub, i32* %ptr, align 8
|
|
|
|
br label %L2
|
|
|
|
L2:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @foo()
|
|
|
|
declare void @bar(i32)
|
|
|
|
|
|
|
|
; Don't transform since the call will clobber the NZCV bits.
|
|
|
|
; CHECK-LABEL: test_call_clobber:
|
|
|
|
; CHECK: and w[[DST:[0-9]+]], w1, #0x6
|
|
|
|
; CHECK: bl bar
|
|
|
|
; CHECK: cbnz w[[DST]]
|
|
|
|
define void @test_call_clobber(i32 %unused, i32 %a) {
|
|
|
|
entry:
|
|
|
|
%c = and i32 %a, 6
|
|
|
|
call void @bar(i32 %c)
|
|
|
|
%tobool = icmp eq i32 %c, 0
|
|
|
|
br i1 %tobool, label %if.end, label %if.then
|
|
|
|
|
|
|
|
if.then:
|
|
|
|
tail call void @foo()
|
|
|
|
unreachable
|
|
|
|
|
|
|
|
if.end:
|
|
|
|
ret void
|
|
|
|
}
|