2014-04-04 00:01:44 +08:00
|
|
|
; RUN: llc -mtriple=arm-apple-darwin %s -o - | FileCheck %s
|
|
|
|
|
|
|
|
; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \
|
|
|
|
; RUN: | FileCheck %s --check-prefix=CHECK-VFP
|
|
|
|
|
|
|
|
; RUN: llc -mtriple=thumbv7-apple-darwin -mattr=+neon,+thumb2 %s -o - \
|
|
|
|
; RUN: | FileCheck %s --check-prefix=CHECK-NEON
|
2006-08-22 14:43:24 +08:00
|
|
|
|
2007-01-26 16:25:06 +08:00
|
|
|
define i32 @f1(i32 %a.s) {
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: f1:
|
2009-10-10 01:20:46 +08:00
|
|
|
;CHECK: moveq
|
2006-08-22 06:00:32 +08:00
|
|
|
entry:
|
2007-01-19 17:20:23 +08:00
|
|
|
%tmp = icmp eq i32 %a.s, 4
|
|
|
|
%tmp1.s = select i1 %tmp, i32 2, i32 3
|
|
|
|
ret i32 %tmp1.s
|
2006-08-22 06:00:32 +08:00
|
|
|
}
|
2006-09-21 21:06:26 +08:00
|
|
|
|
2007-01-26 16:25:06 +08:00
|
|
|
define i32 @f2(i32 %a.s) {
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: f2:
|
2009-10-10 01:20:46 +08:00
|
|
|
;CHECK: movgt
|
2006-09-21 21:06:26 +08:00
|
|
|
entry:
|
2007-01-19 17:20:23 +08:00
|
|
|
%tmp = icmp sgt i32 %a.s, 4
|
|
|
|
%tmp1.s = select i1 %tmp, i32 2, i32 3
|
|
|
|
ret i32 %tmp1.s
|
2006-09-21 21:06:26 +08:00
|
|
|
}
|
|
|
|
|
2007-01-26 16:25:06 +08:00
|
|
|
define i32 @f3(i32 %a.s, i32 %b.s) {
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: f3:
|
2009-10-10 01:20:46 +08:00
|
|
|
;CHECK: movlt
|
2006-09-21 21:06:26 +08:00
|
|
|
entry:
|
2007-01-19 17:20:23 +08:00
|
|
|
%tmp = icmp slt i32 %a.s, %b.s
|
|
|
|
%tmp1.s = select i1 %tmp, i32 2, i32 3
|
|
|
|
ret i32 %tmp1.s
|
2006-09-21 21:06:26 +08:00
|
|
|
}
|
|
|
|
|
2007-01-26 16:25:06 +08:00
|
|
|
define i32 @f4(i32 %a.s, i32 %b.s) {
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: f4:
|
2009-10-10 01:20:46 +08:00
|
|
|
;CHECK: movle
|
2006-09-21 21:06:26 +08:00
|
|
|
entry:
|
2007-01-19 17:20:23 +08:00
|
|
|
%tmp = icmp sle i32 %a.s, %b.s
|
|
|
|
%tmp1.s = select i1 %tmp, i32 2, i32 3
|
|
|
|
ret i32 %tmp1.s
|
2006-09-21 21:06:26 +08:00
|
|
|
}
|
|
|
|
|
2007-01-26 16:25:06 +08:00
|
|
|
define i32 @f5(i32 %a.u, i32 %b.u) {
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: f5:
|
2009-10-10 01:20:46 +08:00
|
|
|
;CHECK: movls
|
2006-09-21 21:06:26 +08:00
|
|
|
entry:
|
2007-01-19 17:20:23 +08:00
|
|
|
%tmp = icmp ule i32 %a.u, %b.u
|
|
|
|
%tmp1.s = select i1 %tmp, i32 2, i32 3
|
|
|
|
ret i32 %tmp1.s
|
2006-09-21 21:06:26 +08:00
|
|
|
}
|
|
|
|
|
2007-01-26 16:25:06 +08:00
|
|
|
define i32 @f6(i32 %a.u, i32 %b.u) {
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: f6:
|
2009-10-10 01:20:46 +08:00
|
|
|
;CHECK: movhi
|
2006-09-21 21:06:26 +08:00
|
|
|
entry:
|
2007-01-19 17:20:23 +08:00
|
|
|
%tmp = icmp ugt i32 %a.u, %b.u
|
|
|
|
%tmp1.s = select i1 %tmp, i32 2, i32 3
|
|
|
|
ret i32 %tmp1.s
|
|
|
|
}
|
|
|
|
|
2007-01-26 16:25:06 +08:00
|
|
|
define double @f7(double %a, double %b) {
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-LABEL: f7:
|
2018-02-01 06:55:19 +08:00
|
|
|
;CHECK: movlt
|
2013-10-12 02:09:19 +08:00
|
|
|
;CHECK: movge
|
2013-07-14 14:24:09 +08:00
|
|
|
;CHECK-VFP-LABEL: f7:
|
2009-11-09 08:11:35 +08:00
|
|
|
;CHECK-VFP: vmovmi
|
2007-01-19 17:20:23 +08:00
|
|
|
%tmp = fcmp olt double %a, 1.234e+00
|
|
|
|
%tmp1 = select i1 %tmp, double -1.000e+00, double %b
|
|
|
|
ret double %tmp1
|
2006-09-21 21:06:26 +08:00
|
|
|
}
|
Consider this code snippet:
float t1(int argc) {
return (argc == 1123) ? 1.234f : 2.38213f;
}
We would generate truly awful code on ARM (those with a weak stomach should look
away):
_t1:
movw r1, #1123
movs r2, #1
movs r3, #0
cmp r0, r1
mov.w r0, #0
it eq
moveq r0, r2
movs r1, #4
cmp r0, #0
it ne
movne r3, r1
adr r0, #LCPI1_0
ldr r0, [r0, r3]
bx lr
The problem was that legalization was creating a cascade of SELECT_CC nodes, for
for the comparison of "argc == 1123" which was fed into a SELECT node for the ?:
statement which was itself converted to a SELECT_CC node. This is because the
ARM back-end doesn't have custom lowering for SELECT nodes, so it used the
default "Expand".
I added a fairly simple "LowerSELECT" to the ARM back-end. It takes care of this
testcase, but can obviously be expanded to include more cases.
Now we generate this, which looks optimal to me:
_t1:
movw r1, #1123
movs r2, #0
cmp r0, r1
adr r0, #LCPI0_0
it eq
moveq r2, #4
ldr r0, [r0, r2]
bx lr
.align 2
LCPI0_0:
.long 1075344593 @ float 2.382130e+00
.long 1067316150 @ float 1.234000e+00
llvm-svn: 110799
2010-08-11 16:43:16 +08:00
|
|
|
|
|
|
|
; <rdar://problem/7260094>
|
|
|
|
;
|
|
|
|
; We used to generate really horrible code for this function. The main cause was
|
|
|
|
; a lack of a custom lowering routine for an ISD::SELECT. This would result in
|
|
|
|
; two "it" blocks in the code: one for the "icmp" and another to move the index
|
|
|
|
; into the constant pool based on the value of the "icmp". If we have one "it"
|
|
|
|
; block generated, odds are good that we have close to the ideal code for this:
|
|
|
|
;
|
2013-10-11 06:37:47 +08:00
|
|
|
; CHECK-NEON-LABEL: f8:
|
2013-10-12 02:09:19 +08:00
|
|
|
; CHECK-NEON: adr [[R2:r[0-9]+]], LCPI7_0
|
2018-10-30 23:04:40 +08:00
|
|
|
; CHECK-NEON: movw [[R3:r[0-9]+]], #1123
|
2013-10-12 02:09:19 +08:00
|
|
|
; CHECK-NEON-NEXT: cmp r0, [[R3]]
|
2012-08-17 07:21:55 +08:00
|
|
|
; CHECK-NEON-NEXT: it eq
|
Use predication instead of pseudo-opcodes when folding into MOVCC.
Now that it is possible to dynamically tie MachineInstr operands,
predicated instructions are possible in SSA form:
%vreg3<def> = SUBri %vreg1, -2147483647, pred:14, pred:%noreg, %opt:%noreg
%vreg4<def,tied1> = MOVCCr %vreg3<tied0>, %vreg1, %pred:12, pred:%CPSR
Becomes a predicated SUBri with a tied imp-use:
SUBri %vreg1, -2147483647, pred:13, pred:%CPSR, opt:%noreg, %vreg1<imp-use,tied0>
This means that any instruction that is safe to move can be folded into
a MOVCC, and the *CC pseudo-instructions are no longer needed.
The test case changes reflect that Thumb2SizeReduce recognizes the
predicated instructions. It didn't understand the pseudos.
llvm-svn: 163274
2012-09-06 07:58:02 +08:00
|
|
|
; CHECK-NEON-NEXT: addeq{{.*}} [[R2]], #4
|
Consider this code snippet:
float t1(int argc) {
return (argc == 1123) ? 1.234f : 2.38213f;
}
We would generate truly awful code on ARM (those with a weak stomach should look
away):
_t1:
movw r1, #1123
movs r2, #1
movs r3, #0
cmp r0, r1
mov.w r0, #0
it eq
moveq r0, r2
movs r1, #4
cmp r0, #0
it ne
movne r3, r1
adr r0, #LCPI1_0
ldr r0, [r0, r3]
bx lr
The problem was that legalization was creating a cascade of SELECT_CC nodes, for
for the comparison of "argc == 1123" which was fed into a SELECT node for the ?:
statement which was itself converted to a SELECT_CC node. This is because the
ARM back-end doesn't have custom lowering for SELECT nodes, so it used the
default "Expand".
I added a fairly simple "LowerSELECT" to the ARM back-end. It takes care of this
testcase, but can obviously be expanded to include more cases.
Now we generate this, which looks optimal to me:
_t1:
movw r1, #1123
movs r2, #0
cmp r0, r1
adr r0, #LCPI0_0
it eq
moveq r2, #4
ldr r0, [r0, r2]
bx lr
.align 2
LCPI0_0:
.long 1075344593 @ float 2.382130e+00
.long 1067316150 @ float 1.234000e+00
llvm-svn: 110799
2010-08-11 16:43:16 +08:00
|
|
|
; CHECK-NEON-NEXT: ldr
|
|
|
|
; CHECK-NEON: bx
|
|
|
|
|
|
|
|
define arm_apcscc float @f8(i32 %a) nounwind {
|
|
|
|
%tmp = icmp eq i32 %a, 1123
|
|
|
|
%tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000
|
|
|
|
ret float %tmp1
|
|
|
|
}
|
2011-03-08 09:17:20 +08:00
|
|
|
|
|
|
|
; <rdar://problem/9049552>
|
|
|
|
; Glue values can only have a single use, but the following test exposed a
|
|
|
|
; case where a SELECT was lowered with 2 uses of a comparison, causing the
|
|
|
|
; scheduler to assert.
|
2013-07-14 14:24:09 +08:00
|
|
|
; CHECK-VFP-LABEL: f9:
|
2011-03-08 09:17:20 +08:00
|
|
|
|
|
|
|
declare i8* @objc_msgSend(i8*, i8*, ...)
|
|
|
|
define void @f9() optsize {
|
|
|
|
entry:
|
|
|
|
%cmp = icmp eq i8* undef, inttoptr (i32 4 to i8*)
|
|
|
|
%conv191 = select i1 %cmp, float -3.000000e+00, float 0.000000e+00
|
|
|
|
%conv195 = select i1 %cmp, double -1.000000e+00, double 0.000000e+00
|
|
|
|
%add = fadd double %conv195, 1.100000e+01
|
|
|
|
%conv196 = fptrunc double %add to float
|
|
|
|
%add201 = fadd float undef, %conv191
|
|
|
|
%tmp484 = bitcast float %conv196 to i32
|
|
|
|
%tmp478 = bitcast float %add201 to i32
|
|
|
|
%tmp490 = insertvalue [2 x i32] undef, i32 %tmp484, 0
|
|
|
|
%tmp493 = insertvalue [2 x i32] %tmp490, i32 %tmp478, 1
|
|
|
|
call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, [2 x i32], i32, float)*)(i8* undef, i8* undef, [2 x i32] %tmp493, i32 0, float 1.000000e+00) optsize
|
|
|
|
ret void
|
|
|
|
}
|
2012-07-10 04:31:12 +08:00
|
|
|
|
2013-10-11 06:37:47 +08:00
|
|
|
; CHECK-LABEL: f10:
|
2012-07-10 04:31:12 +08:00
|
|
|
define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp {
|
|
|
|
; CHECK-NOT: floatsisf
|
|
|
|
%1 = icmp eq i32 %a, %b
|
|
|
|
%2 = zext i1 %1 to i32
|
|
|
|
%3 = sitofp i32 %2 to float
|
|
|
|
ret float %3
|
|
|
|
}
|
|
|
|
|
2013-10-11 06:37:47 +08:00
|
|
|
; CHECK-LABEL: f11:
|
2012-07-10 04:31:12 +08:00
|
|
|
define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp {
|
|
|
|
; CHECK-NOT: floatsisf
|
|
|
|
%1 = icmp eq i32 %a, %b
|
|
|
|
%2 = sitofp i1 %1 to float
|
|
|
|
ret float %2
|
|
|
|
}
|
|
|
|
|
2013-10-11 06:37:47 +08:00
|
|
|
; CHECK-LABEL: f12:
|
2012-07-10 04:31:12 +08:00
|
|
|
define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
|
|
|
|
; CHECK-NOT: floatunsisf
|
|
|
|
%1 = icmp eq i32 %a, %b
|
|
|
|
%2 = uitofp i1 %1 to float
|
|
|
|
ret float %2
|
|
|
|
}
|
|
|
|
|
2018-12-03 19:16:21 +08:00
|
|
|
; CHECK-LABEL: test_overflow_recombine:
|
|
|
|
define i1 @test_overflow_recombine(i32 %in) {
|
|
|
|
; CHECK: smull [[LO:r[0-9]+]], [[HI:r[0-9]+]]
|
|
|
|
; CHECK: subs [[ZERO:r[0-9]+]], [[HI]], [[LO]], asr #31
|
|
|
|
; CHECK: movne [[ZERO]], #1
|
|
|
|
%prod = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 0, i32 %in)
|
|
|
|
%overflow = extractvalue { i32, i1 } %prod, 1
|
|
|
|
ret i1 %overflow
|
|
|
|
}
|
|
|
|
|
|
|
|
declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32)
|