2016-08-04 02:17:35 +08:00
|
|
|
; RUN: llc -verify-machineinstrs -print-before=peephole-opt -print-after=peephole-opt -mtriple=powerpc64-unknown-linux-gnu -o /dev/null 2>&1 < %s | FileCheck %s
|
2017-10-26 17:01:51 +08:00
|
|
|
; RUN: llc -verify-machineinstrs -print-before=peephole-opt -print-after=peephole-opt -mtriple=powerpc64le-unknown-linux-gnu -o /dev/null 2>&1 < %s | FileCheck %s
|
2016-04-12 11:10:52 +08:00
|
|
|
|
Summary
PPC backend eliminates compare instructions by using record-form instructions in PPCInstrInfo::optimizeCompareInstr, which is called from peephole optimization pass.
This patch improves this optimization to eliminate more compare instructions in two types of common case.
- comparison against a constant 1 or -1
The record-form instructions set CR bit based on signed comparison against 0. So, the current implementation does not exploit the record-form instruction for comparison against a non-zero constant.
This patch enables record-form optimization for constant of 1 or -1 if possible; it changes the condition "greater than -1" into "greater than or equal to 0" and "less than 1" into "less than or equal to 0".
With this patch, compare can be eliminated in the following code sequence, as an example.
uint64_t a, b;
if ((a | b) & 0x8000000000000000ull) { ... }
else { ... }
- andi for 32-bit comparison on PPC64
Since record-form instructions execute 64-bit signed comparison and so we have limitation in eliminating 32-bit comparison, i.e. with cmplwi, using the record-form. The original implementation already has such checks but andi. is not recognized as an instruction which executes implicit zero extension and hence safe to convert into record-form if used for equality check.
%1 = and i32 %a, 10
%2 = icmp ne i32 %1, 0
br i1 %2, label %foo, label %bar
In this simple example, LLVM generates andi. + cmplwi + beq on PPC64.
This patch make it possible to eliminate the cmplwi for this case.
I added andi. for optimization targets if it is safe to do so.
Differential Revision: https://reviews.llvm.org/D30081
llvm-svn: 303500
2017-05-21 14:00:05 +08:00
|
|
|
; CHECK-LABEL: fn1
|
2016-04-12 11:10:52 +08:00
|
|
|
define signext i32 @fn1(i32 %baz) {
|
|
|
|
%1 = mul nsw i32 %baz, 208
|
|
|
|
%2 = zext i32 %1 to i64
|
|
|
|
%3 = shl i64 %2, 48
|
|
|
|
%4 = ashr exact i64 %3, 48
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK: ANDIo8 killed {{[^,]+}}, 65520, implicit-def dead %cr0;
|
2016-04-12 11:10:52 +08:00
|
|
|
; CHECK: CMPLDI
|
|
|
|
; CHECK: BCC
|
|
|
|
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK: ANDIo8 {{[^,]+}}, 65520, implicit-def %cr0;
|
2017-11-29 01:15:09 +08:00
|
|
|
; CHECK: COPY %cr0
|
2016-04-12 11:10:52 +08:00
|
|
|
; CHECK: BCC
|
|
|
|
%5 = icmp eq i64 %4, 0
|
|
|
|
br i1 %5, label %foo, label %bar
|
|
|
|
|
|
|
|
foo:
|
|
|
|
ret i32 1
|
|
|
|
|
|
|
|
bar:
|
|
|
|
ret i32 0
|
|
|
|
}
|
Summary
PPC backend eliminates compare instructions by using record-form instructions in PPCInstrInfo::optimizeCompareInstr, which is called from peephole optimization pass.
This patch improves this optimization to eliminate more compare instructions in two types of common case.
- comparison against a constant 1 or -1
The record-form instructions set CR bit based on signed comparison against 0. So, the current implementation does not exploit the record-form instruction for comparison against a non-zero constant.
This patch enables record-form optimization for constant of 1 or -1 if possible; it changes the condition "greater than -1" into "greater than or equal to 0" and "less than 1" into "less than or equal to 0".
With this patch, compare can be eliminated in the following code sequence, as an example.
uint64_t a, b;
if ((a | b) & 0x8000000000000000ull) { ... }
else { ... }
- andi for 32-bit comparison on PPC64
Since record-form instructions execute 64-bit signed comparison and so we have limitation in eliminating 32-bit comparison, i.e. with cmplwi, using the record-form. The original implementation already has such checks but andi. is not recognized as an instruction which executes implicit zero extension and hence safe to convert into record-form if used for equality check.
%1 = and i32 %a, 10
%2 = icmp ne i32 %1, 0
br i1 %2, label %foo, label %bar
In this simple example, LLVM generates andi. + cmplwi + beq on PPC64.
This patch make it possible to eliminate the cmplwi for this case.
I added andi. for optimization targets if it is safe to do so.
Differential Revision: https://reviews.llvm.org/D30081
llvm-svn: 303500
2017-05-21 14:00:05 +08:00
|
|
|
|
|
|
|
; CHECK-LABEL: fn2
|
|
|
|
define signext i32 @fn2(i64 %a, i64 %b) {
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK: OR8o {{[^, ]+}}, {{[^, ]+}}, implicit-def %cr0;
|
|
|
|
; CHECK: [[CREG:[^, ]+]]:crrc = COPY killed %cr0
|
|
|
|
; CHECK: BCC 12, killed [[CREG]]
|
Summary
PPC backend eliminates compare instructions by using record-form instructions in PPCInstrInfo::optimizeCompareInstr, which is called from peephole optimization pass.
This patch improves this optimization to eliminate more compare instructions in two types of common case.
- comparison against a constant 1 or -1
The record-form instructions set CR bit based on signed comparison against 0. So, the current implementation does not exploit the record-form instruction for comparison against a non-zero constant.
This patch enables record-form optimization for constant of 1 or -1 if possible; it changes the condition "greater than -1" into "greater than or equal to 0" and "less than 1" into "less than or equal to 0".
With this patch, compare can be eliminated in the following code sequence, as an example.
uint64_t a, b;
if ((a | b) & 0x8000000000000000ull) { ... }
else { ... }
- andi for 32-bit comparison on PPC64
Since record-form instructions execute 64-bit signed comparison and so we have limitation in eliminating 32-bit comparison, i.e. with cmplwi, using the record-form. The original implementation already has such checks but andi. is not recognized as an instruction which executes implicit zero extension and hence safe to convert into record-form if used for equality check.
%1 = and i32 %a, 10
%2 = icmp ne i32 %1, 0
br i1 %2, label %foo, label %bar
In this simple example, LLVM generates andi. + cmplwi + beq on PPC64.
This patch make it possible to eliminate the cmplwi for this case.
I added andi. for optimization targets if it is safe to do so.
Differential Revision: https://reviews.llvm.org/D30081
llvm-svn: 303500
2017-05-21 14:00:05 +08:00
|
|
|
%1 = or i64 %b, %a
|
|
|
|
%2 = icmp sgt i64 %1, -1
|
|
|
|
br i1 %2, label %foo, label %bar
|
|
|
|
|
|
|
|
foo:
|
|
|
|
ret i32 1
|
|
|
|
|
|
|
|
bar:
|
|
|
|
ret i32 0
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: fn3
|
|
|
|
define signext i32 @fn3(i32 %a) {
|
2017-12-07 18:40:31 +08:00
|
|
|
; CHECK: ANDIo killed {{[%0-9]+}}, 10, implicit-def %cr0;
|
|
|
|
; CHECK: [[CREG:[^, ]+]]:crrc = COPY %cr0
|
|
|
|
; CHECK: BCC 76, killed [[CREG]]
|
Summary
PPC backend eliminates compare instructions by using record-form instructions in PPCInstrInfo::optimizeCompareInstr, which is called from peephole optimization pass.
This patch improves this optimization to eliminate more compare instructions in two types of common case.
- comparison against a constant 1 or -1
The record-form instructions set CR bit based on signed comparison against 0. So, the current implementation does not exploit the record-form instruction for comparison against a non-zero constant.
This patch enables record-form optimization for constant of 1 or -1 if possible; it changes the condition "greater than -1" into "greater than or equal to 0" and "less than 1" into "less than or equal to 0".
With this patch, compare can be eliminated in the following code sequence, as an example.
uint64_t a, b;
if ((a | b) & 0x8000000000000000ull) { ... }
else { ... }
- andi for 32-bit comparison on PPC64
Since record-form instructions execute 64-bit signed comparison and so we have limitation in eliminating 32-bit comparison, i.e. with cmplwi, using the record-form. The original implementation already has such checks but andi. is not recognized as an instruction which executes implicit zero extension and hence safe to convert into record-form if used for equality check.
%1 = and i32 %a, 10
%2 = icmp ne i32 %1, 0
br i1 %2, label %foo, label %bar
In this simple example, LLVM generates andi. + cmplwi + beq on PPC64.
This patch make it possible to eliminate the cmplwi for this case.
I added andi. for optimization targets if it is safe to do so.
Differential Revision: https://reviews.llvm.org/D30081
llvm-svn: 303500
2017-05-21 14:00:05 +08:00
|
|
|
%1 = and i32 %a, 10
|
|
|
|
%2 = icmp ne i32 %1, 0
|
|
|
|
br i1 %2, label %foo, label %bar
|
|
|
|
|
|
|
|
foo:
|
|
|
|
ret i32 1
|
|
|
|
|
|
|
|
bar:
|
|
|
|
ret i32 0
|
|
|
|
}
|
2017-07-27 16:14:48 +08:00
|
|
|
|
|
|
|
; This test case confirms that a record-form instruction is
|
|
|
|
; generated even if the branch has a static branch hint.
|
|
|
|
|
|
|
|
; CHECK-LABEL: fn4
|
|
|
|
define i64 @fn4(i64 %a, i64 %b) {
|
|
|
|
; CHECK: ADD8o
|
|
|
|
; CHECK-NOT: CMP
|
|
|
|
; CHECK: BCC 71
|
|
|
|
|
|
|
|
entry:
|
|
|
|
%add = add nsw i64 %b, %a
|
|
|
|
%cmp = icmp eq i64 %add, 0
|
|
|
|
br i1 %cmp, label %if.then, label %if.end
|
|
|
|
|
|
|
|
if.then:
|
|
|
|
tail call void @exit(i32 signext 0) #3
|
|
|
|
unreachable
|
|
|
|
|
|
|
|
if.end:
|
|
|
|
ret i64 %add
|
|
|
|
}
|
|
|
|
|
|
|
|
declare void @exit(i32 signext)
|
2017-10-18 18:31:19 +08:00
|
|
|
|
|
|
|
; Since %v1 and %v2 are zero-extended 32-bit values, %1 is also zero-extended.
|
|
|
|
; In this case, we want to use ORo instead of OR + CMPLWI.
|
|
|
|
|
|
|
|
; CHECK-LABEL: fn5
|
|
|
|
define zeroext i32 @fn5(i32* %p1, i32* %p2) {
|
|
|
|
; CHECK: ORo
|
|
|
|
; CHECK-NOT: CMP
|
|
|
|
; CHECK: BCC
|
|
|
|
%v1 = load i32, i32* %p1
|
|
|
|
%v2 = load i32, i32* %p2
|
|
|
|
%1 = or i32 %v1, %v2
|
|
|
|
%2 = icmp eq i32 %1, 0
|
|
|
|
br i1 %2, label %foo, label %bar
|
|
|
|
|
|
|
|
foo:
|
|
|
|
ret i32 1
|
|
|
|
|
|
|
|
bar:
|
|
|
|
ret i32 0
|
|
|
|
}
|
2017-10-26 17:01:51 +08:00
|
|
|
|
|
|
|
; This test confirms record-form instructions are emitted for comparison
|
|
|
|
; against a non-zero value.
|
|
|
|
|
|
|
|
; CHECK-LABEL: fn6
|
|
|
|
define i8* @fn6(i8* readonly %p) {
|
|
|
|
; CHECK: LBZU
|
|
|
|
; CHECK: EXTSBo
|
|
|
|
; CHECK-NOT: CMP
|
|
|
|
; CHECK: BCC
|
|
|
|
; CHECK: LBZU
|
|
|
|
; CHECK: EXTSBo
|
|
|
|
; CHECK-NOT: CMP
|
|
|
|
; CHECK: BCC
|
|
|
|
|
|
|
|
entry:
|
|
|
|
%incdec.ptr = getelementptr inbounds i8, i8* %p, i64 -1
|
|
|
|
%0 = load i8, i8* %incdec.ptr
|
|
|
|
%cmp = icmp sgt i8 %0, -1
|
|
|
|
br i1 %cmp, label %out, label %if.end
|
|
|
|
|
|
|
|
if.end:
|
|
|
|
%incdec.ptr2 = getelementptr inbounds i8, i8* %p, i64 -2
|
|
|
|
%1 = load i8, i8* %incdec.ptr2
|
|
|
|
%cmp4 = icmp sgt i8 %1, -1
|
|
|
|
br i1 %cmp4, label %out, label %cleanup
|
|
|
|
|
|
|
|
out:
|
|
|
|
%p.addr.0 = phi i8* [ %incdec.ptr, %entry ], [ %incdec.ptr2, %if.end ]
|
|
|
|
br label %cleanup
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
%retval.0 = phi i8* [ %p.addr.0, %out ], [ null, %if.end ]
|
|
|
|
ret i8* %retval.0
|
|
|
|
}
|