llvm-project/llvm/test/Transforms/InstCombine/select.ll

1507 lines
47 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
2007-11-26 05:27:53 +08:00
; PR1822
target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
define i32 @test1(i32 %A, i32 %B) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: ret i32 [[B:%.*]]
;
%C = select i1 false, i32 %A, i32 %B
ret i32 %C
}
define i32 @test2(i32 %A, i32 %B) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: ret i32 [[A:%.*]]
;
%C = select i1 true, i32 %A, i32 %B
ret i32 %C
}
define i32 @test3(i1 %C, i32 %I) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: ret i32 [[I:%.*]]
;
%V = select i1 %C, i32 %I, i32 %I
ret i32 %V
}
define i1 @test4(i1 %C) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: ret i1 [[C:%.*]]
;
%V = select i1 %C, i1 true, i1 false
ret i1 %V
}
define i1 @test5(i1 %C) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
; CHECK-NEXT: ret i1 [[NOT_C]]
;
%V = select i1 %C, i1 false, i1 true
ret i1 %V
}
[InstCombine] allow more than one use for vector bitcast folding with selects The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
2016-06-18 00:46:50 +08:00
define i32 @test6(i1 %C) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: [[V:%.*]] = zext i1 [[C:%.*]] to i32
; CHECK-NEXT: ret i32 [[V]]
;
%V = select i1 %C, i32 1, i32 0
ret i32 %V
}
define i1 @trueval_is_true(i1 %C, i1 %X) {
; CHECK-LABEL: @trueval_is_true(
; CHECK-NEXT: [[R:%.*]] = or i1 [[C:%.*]], [[X:%.*]]
; CHECK-NEXT: ret i1 [[R]]
;
%R = select i1 %C, i1 true, i1 %X
ret i1 %R
}
define <2 x i1> @trueval_is_true_vec(<2 x i1> %C, <2 x i1> %X) {
; CHECK-LABEL: @trueval_is_true_vec(
; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[C:%.*]], [[X:%.*]]
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%R = select <2 x i1> %C, <2 x i1> <i1 true, i1 true>, <2 x i1> %X
ret <2 x i1> %R
}
define <2 x i1> @trueval_is_true_vec_undef_elt(<2 x i1> %C, <2 x i1> %X) {
; CHECK-LABEL: @trueval_is_true_vec_undef_elt(
; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[C:%.*]], [[X:%.*]]
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%R = select <2 x i1> %C, <2 x i1> <i1 undef, i1 true>, <2 x i1> %X
ret <2 x i1> %R
}
define i1 @test8(i1 %C, i1 %X) {
; CHECK-LABEL: @test8(
; CHECK-NEXT: [[R:%.*]] = and i1 [[C:%.*]], [[X:%.*]]
; CHECK-NEXT: ret i1 [[R]]
;
%R = select i1 %C, i1 %X, i1 false
ret i1 %R
}
define <2 x i1> @test8vec(<2 x i1> %C, <2 x i1> %X) {
; CHECK-LABEL: @test8vec(
; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[C:%.*]], [[X:%.*]]
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%R = select <2 x i1> %C, <2 x i1> %X, <2 x i1> <i1 false, i1 false>
ret <2 x i1> %R
}
define i1 @test9(i1 %C, i1 %X) {
; CHECK-LABEL: @test9(
; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
; CHECK-NEXT: [[R:%.*]] = and i1 [[NOT_C]], [[X:%.*]]
; CHECK-NEXT: ret i1 [[R]]
;
%R = select i1 %C, i1 false, i1 %X
ret i1 %R
}
define <2 x i1> @test9vec(<2 x i1> %C, <2 x i1> %X) {
; CHECK-LABEL: @test9vec(
; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], <i1 true, i1 true>
; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[NOT_C]], [[X:%.*]]
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%R = select <2 x i1> %C, <2 x i1> <i1 false, i1 false>, <2 x i1> %X
ret <2 x i1> %R
}
define i1 @test10(i1 %C, i1 %X) {
; CHECK-LABEL: @test10(
; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
; CHECK-NEXT: [[R:%.*]] = or i1 [[NOT_C]], [[X:%.*]]
; CHECK-NEXT: ret i1 [[R]]
;
%R = select i1 %C, i1 %X, i1 true
ret i1 %R
}
define <2 x i1> @test10vec(<2 x i1> %C, <2 x i1> %X) {
; CHECK-LABEL: @test10vec(
; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> [[C:%.*]], <i1 true, i1 true>
; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[NOT_C]], [[X:%.*]]
; CHECK-NEXT: ret <2 x i1> [[R]]
;
%R = select <2 x i1> %C, <2 x i1> %X, <2 x i1> <i1 true, i1 true>
ret <2 x i1> %R
}
define i1 @test23(i1 %a, i1 %b) {
; CHECK-LABEL: @test23(
; CHECK-NEXT: [[C:%.*]] = and i1 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%c = select i1 %a, i1 %b, i1 %a
ret i1 %c
}
define <2 x i1> @test23vec(<2 x i1> %a, <2 x i1> %b) {
; CHECK-LABEL: @test23vec(
; CHECK-NEXT: [[C:%.*]] = and <2 x i1> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%c = select <2 x i1> %a, <2 x i1> %b, <2 x i1> %a
ret <2 x i1> %c
}
define i1 @test24(i1 %a, i1 %b) {
; CHECK-LABEL: @test24(
; CHECK-NEXT: [[C:%.*]] = or i1 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%c = select i1 %a, i1 %a, i1 %b
ret i1 %c
}
define <2 x i1> @test24vec(<2 x i1> %a, <2 x i1> %b) {
; CHECK-LABEL: @test24vec(
; CHECK-NEXT: [[C:%.*]] = or <2 x i1> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%c = select <2 x i1> %a, <2 x i1> %a, <2 x i1> %b
ret <2 x i1> %c
}
define i1 @test62(i1 %A, i1 %B) {
; CHECK-LABEL: @test62(
; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[A:%.*]], true
; CHECK-NEXT: [[C:%.*]] = and i1 [[NOT]], [[B:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%not = xor i1 %A, true
%C = select i1 %A, i1 %not, i1 %B
ret i1 %C
}
define <2 x i1> @test62vec(<2 x i1> %A, <2 x i1> %B) {
; CHECK-LABEL: @test62vec(
; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], <i1 true, i1 true>
; CHECK-NEXT: [[C:%.*]] = and <2 x i1> [[NOT]], [[B:%.*]]
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%not = xor <2 x i1> %A, <i1 true, i1 true>
%C = select <2 x i1> %A, <2 x i1> %not, <2 x i1> %B
ret <2 x i1> %C
}
define i1 @test63(i1 %A, i1 %B) {
; CHECK-LABEL: @test63(
; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[A:%.*]], true
; CHECK-NEXT: [[C:%.*]] = or i1 [[NOT]], [[B:%.*]]
; CHECK-NEXT: ret i1 [[C]]
;
%not = xor i1 %A, true
%C = select i1 %A, i1 %B, i1 %not
ret i1 %C
}
define <2 x i1> @test63vec(<2 x i1> %A, <2 x i1> %B) {
; CHECK-LABEL: @test63vec(
; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[A:%.*]], <i1 true, i1 true>
; CHECK-NEXT: [[C:%.*]] = or <2 x i1> [[NOT]], [[B:%.*]]
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%not = xor <2 x i1> %A, <i1 true, i1 true>
%C = select <2 x i1> %A, <2 x i1> %B, <2 x i1> %not
ret <2 x i1> %C
}
2004-04-10 02:19:29 +08:00
define i32 @test11(i32 %a) {
; CHECK-LABEL: @test11(
; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[A:%.*]], 0
; CHECK-NEXT: [[R:%.*]] = zext i1 [[C]] to i32
; CHECK-NEXT: ret i32 [[R]]
;
%C = icmp eq i32 %a, 0
%R = select i1 %C, i32 0, i32 1
ret i32 %R
2004-04-10 02:19:29 +08:00
}
define i32 @test12(i1 %cond, i32 %a) {
; CHECK-LABEL: @test12(
; CHECK-NEXT: [[B:%.*]] = zext i1 [[COND:%.*]] to i32
; CHECK-NEXT: [[C:%.*]] = or i32 [[B]], [[A:%.*]]
; CHECK-NEXT: ret i32 [[C]]
;
%b = or i32 %a, 1
%c = select i1 %cond, i32 %b, i32 %a
ret i32 %c
}
define <2 x i32> @test12vec(<2 x i1> %cond, <2 x i32> %a) {
; CHECK-LABEL: @test12vec(
; CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[COND:%.*]] to <2 x i32>
; CHECK-NEXT: [[C:%.*]] = or <2 x i32> [[B]], [[A:%.*]]
; CHECK-NEXT: ret <2 x i32> [[C]]
;
%b = or <2 x i32> %a, <i32 1, i32 1>
%c = select <2 x i1> %cond, <2 x i32> %b, <2 x i32> %a
ret <2 x i32> %c
}
define i32 @test12a(i1 %cond, i32 %a) {
; CHECK-LABEL: @test12a(
; CHECK-NEXT: [[B:%.*]] = zext i1 [[COND:%.*]] to i32
; CHECK-NEXT: [[C:%.*]] = ashr i32 [[A:%.*]], [[B]]
; CHECK-NEXT: ret i32 [[C]]
;
%b = ashr i32 %a, 1
%c = select i1 %cond, i32 %b, i32 %a
ret i32 %c
}
define <2 x i32> @test12avec(<2 x i1> %cond, <2 x i32> %a) {
; CHECK-LABEL: @test12avec(
; CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[COND:%.*]] to <2 x i32>
; CHECK-NEXT: [[C:%.*]] = ashr <2 x i32> [[A:%.*]], [[B]]
; CHECK-NEXT: ret <2 x i32> [[C]]
;
%b = ashr <2 x i32> %a, <i32 1, i32 1>
%c = select <2 x i1> %cond, <2 x i32> %b, <2 x i32> %a
ret <2 x i32> %c
}
define i32 @test12b(i1 %cond, i32 %a) {
; CHECK-LABEL: @test12b(
; CHECK-NEXT: [[NOT_COND:%.*]] = xor i1 [[COND:%.*]], true
; CHECK-NEXT: [[B:%.*]] = zext i1 [[NOT_COND]] to i32
; CHECK-NEXT: [[D:%.*]] = ashr i32 [[A:%.*]], [[B]]
2016-07-20 06:32:15 +08:00
; CHECK-NEXT: ret i32 [[D]]
;
%b = ashr i32 %a, 1
%d = select i1 %cond, i32 %a, i32 %b
ret i32 %d
}
define <2 x i32> @test12bvec(<2 x i1> %cond, <2 x i32> %a) {
; CHECK-LABEL: @test12bvec(
; CHECK-NEXT: [[NOT_COND:%.*]] = xor <2 x i1> [[COND:%.*]], <i1 true, i1 true>
; CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[NOT_COND]] to <2 x i32>
; CHECK-NEXT: [[D:%.*]] = ashr <2 x i32> [[A:%.*]], [[B]]
; CHECK-NEXT: ret <2 x i32> [[D]]
;
%b = ashr <2 x i32> %a, <i32 1, i32 1>
%d = select <2 x i1> %cond, <2 x i32> %a, <2 x i32> %b
ret <2 x i32> %d
}
define i32 @test13(i32 %a, i32 %b) {
; CHECK-LABEL: @test13(
; CHECK-NEXT: ret i32 [[B:%.*]]
;
%C = icmp eq i32 %a, %b
%V = select i1 %C, i32 %a, i32 %b
ret i32 %V
2004-04-11 06:21:14 +08:00
}
define i32 @test13a(i32 %a, i32 %b) {
; CHECK-LABEL: @test13a(
; CHECK-NEXT: ret i32 [[A:%.*]]
;
%C = icmp ne i32 %a, %b
%V = select i1 %C, i32 %a, i32 %b
ret i32 %V
2004-04-11 06:21:14 +08:00
}
define i32 @test13b(i32 %a, i32 %b) {
; CHECK-LABEL: @test13b(
; CHECK-NEXT: ret i32 [[A:%.*]]
;
%C = icmp eq i32 %a, %b
%V = select i1 %C, i32 %b, i32 %a
ret i32 %V
2004-04-11 06:21:14 +08:00
}
define i1 @test14a(i1 %C, i32 %X) {
; CHECK-LABEL: @test14a(
; CHECK-NEXT: [[R1:%.*]] = icmp slt i32 [[X:%.*]], 1
; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
; CHECK-NEXT: [[R:%.*]] = or i1 [[R1]], [[NOT_C]]
; CHECK-NEXT: ret i1 [[R]]
;
%V = select i1 %C, i32 %X, i32 0
; (X < 1) | !C
%R = icmp slt i32 %V, 1
ret i1 %R
}
define i1 @test14b(i1 %C, i32 %X) {
; CHECK-LABEL: @test14b(
; CHECK-NEXT: [[R1:%.*]] = icmp slt i32 [[X:%.*]], 1
; CHECK-NEXT: [[R:%.*]] = or i1 [[R1]], [[C:%.*]]
; CHECK-NEXT: ret i1 [[R]]
;
%V = select i1 %C, i32 0, i32 %X
; (X < 1) | C
%R = icmp slt i32 %V, 1
ret i1 %R
}
define i32 @test16(i1 %C, i32* %P) {
; CHECK-LABEL: @test16(
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
%P2 = select i1 %C, i32* %P, i32* null
%V = load i32, i32* %P2
ret i32 %V
}
2005-04-23 23:31:03 +08:00
;; It may be legal to load from a null address in a non-zero address space
define i32 @test16_neg(i1 %C, i32 addrspace(1)* %P) {
; CHECK-LABEL: @test16_neg(
; CHECK-NEXT: [[P2:%.*]] = select i1 [[C:%.*]], i32 addrspace(1)* [[P:%.*]], i32 addrspace(1)* null
; CHECK-NEXT: [[V:%.*]] = load i32, i32 addrspace(1)* [[P2]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
%P2 = select i1 %C, i32 addrspace(1)* %P, i32 addrspace(1)* null
%V = load i32, i32 addrspace(1)* %P2
ret i32 %V
}
define i32 @test16_neg2(i1 %C, i32 addrspace(1)* %P) {
; CHECK-LABEL: @test16_neg2(
; CHECK-NEXT: [[P2:%.*]] = select i1 [[C:%.*]], i32 addrspace(1)* null, i32 addrspace(1)* [[P:%.*]]
; CHECK-NEXT: [[V:%.*]] = load i32, i32 addrspace(1)* [[P2]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
%P2 = select i1 %C, i32 addrspace(1)* null, i32 addrspace(1)* %P
%V = load i32, i32 addrspace(1)* %P2
ret i32 %V
}
;; It may be legal to load from a null address with null pointer valid attribute.
define i32 @test16_no_null_opt(i1 %C, i32* %P) #0 {
; CHECK-LABEL: @test16_no_null_opt(
; CHECK-NEXT: [[P2:%.*]] = select i1 [[C:%.*]], i32* [[P:%.*]], i32* null
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P2]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
%P2 = select i1 %C, i32* %P, i32* null
%V = load i32, i32* %P2
ret i32 %V
}
define i32 @test16_no_null_opt_2(i1 %C, i32* %P) #0 {
; CHECK-LABEL: @test16_no_null_opt_2(
; CHECK-NEXT: [[P2:%.*]] = select i1 [[C:%.*]], i32* null, i32* [[P:%.*]]
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P2]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
%P2 = select i1 %C, i32* null, i32* %P
%V = load i32, i32* %P2
ret i32 %V
}
attributes #0 = { "null-pointer-is-valid"="true" }
define i1 @test17(i32* %X, i1 %C) {
; CHECK-LABEL: @test17(
; CHECK-NEXT: [[RV1:%.*]] = icmp eq i32* [[X:%.*]], null
; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 [[C:%.*]], true
; CHECK-NEXT: [[RV:%.*]] = or i1 [[RV1]], [[NOT_C]]
; CHECK-NEXT: ret i1 [[RV]]
;
%R = select i1 %C, i32* %X, i32* null
%RV = icmp eq i32* %R, null
ret i1 %RV
2005-04-23 23:31:03 +08:00
}
2006-09-10 04:26:04 +08:00
define i32 @test18(i32 %X, i32 %Y, i1 %C) {
; CHECK-LABEL: @test18(
; CHECK-NEXT: [[V:%.*]] = sdiv i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: ret i32 [[V]]
;
%R = select i1 %C, i32 %X, i32 0
%V = sdiv i32 %Y, %R
ret i32 %V
2006-09-10 04:26:04 +08:00
}
2006-09-19 14:16:46 +08:00
define i32 @test19(i32 %x) {
; CHECK-LABEL: @test19(
; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr i32 [[X:%.*]], 31
; CHECK-NEXT: ret i32 [[X_LOBIT]]
;
%tmp = icmp ugt i32 %x, 2147483647
%retval = select i1 %tmp, i32 -1, i32 0
ret i32 %retval
2006-09-19 14:16:46 +08:00
}
define i32 @test20(i32 %x) {
; CHECK-LABEL: @test20(
; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr i32 [[X:%.*]], 31
; CHECK-NEXT: ret i32 [[X_LOBIT]]
;
%tmp = icmp slt i32 %x, 0
%retval = select i1 %tmp, i32 -1, i32 0
ret i32 %retval
2006-09-19 14:16:46 +08:00
}
define i64 @test21(i32 %x) {
; CHECK-LABEL: @test21(
; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr i32 [[X:%.*]], 31
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[X_LOBIT]] to i64
; CHECK-NEXT: ret i64 [[TMP1]]
;
%tmp = icmp slt i32 %x, 0
%retval = select i1 %tmp, i64 -1, i64 0
ret i64 %retval
2006-09-19 14:17:55 +08:00
}
define i16 @test22(i32 %x) {
; CHECK-LABEL: @test22(
; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr i32 [[X:%.*]], 31
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X_LOBIT]] to i16
; CHECK-NEXT: ret i16 [[TMP1]]
;
%tmp = icmp slt i32 %x, 0
%retval = select i1 %tmp, i16 -1, i16 0
ret i16 %retval
2006-09-19 14:17:55 +08:00
}
define i32 @test25(i1 %c) {
; CHECK-LABEL: @test25(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C:%.*]], label [[JUMP:%.*]], label [[RET:%.*]]
; CHECK: jump:
; CHECK-NEXT: br label [[RET]]
; CHECK: ret:
; CHECK-NEXT: [[A:%.*]] = phi i32 [ 10, [[JUMP]] ], [ 20, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[A]]
;
entry:
br i1 %c, label %jump, label %ret
jump:
[InstCombine] allow more than one use for vector bitcast folding with selects The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
2016-06-18 00:46:50 +08:00
br label %ret
ret:
%a = phi i1 [true, %jump], [false, %entry]
%b = select i1 %a, i32 10, i32 20
ret i32 %b
}
define i32 @test26(i1 %cond) {
; CHECK-LABEL: @test26(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[JUMP:%.*]], label [[RET:%.*]]
; CHECK: jump:
; CHECK-NEXT: br label [[RET]]
; CHECK: ret:
; CHECK-NEXT: [[A:%.*]] = phi i32 [ 20, [[ENTRY:%.*]] ], [ 10, [[JUMP]] ]
; CHECK-NEXT: ret i32 [[A]]
;
entry:
br i1 %cond, label %jump, label %ret
jump:
%c = or i1 false, false
[InstCombine] allow more than one use for vector bitcast folding with selects The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
2016-06-18 00:46:50 +08:00
br label %ret
ret:
%a = phi i1 [true, %entry], [%c, %jump]
%b = select i1 %a, i32 20, i32 10
ret i32 %b
}
define i32 @test27(i1 %c, i32 %A, i32 %B) {
; CHECK-LABEL: @test27(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C:%.*]], label [[JUMP:%.*]], label [[RET:%.*]]
; CHECK: jump:
; CHECK-NEXT: br label [[RET]]
; CHECK: ret:
; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[P]]
;
entry:
br i1 %c, label %jump, label %ret
jump:
[InstCombine] allow more than one use for vector bitcast folding with selects The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
2016-06-18 00:46:50 +08:00
br label %ret
ret:
%p = phi i1 [true, %jump], [false, %entry]
%s = select i1 %p, i32 %A, i32 %B
ret i32 %s
}
define i32 @test28(i1 %cond, i32 %A, i32 %B) {
; CHECK-LABEL: @test28(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[JUMP:%.*]], label [[RET:%.*]]
; CHECK: jump:
; CHECK-NEXT: br label [[RET]]
; CHECK: ret:
; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[P]]
;
entry:
br i1 %cond, label %jump, label %ret
jump:
[InstCombine] allow more than one use for vector bitcast folding with selects The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
2016-06-18 00:46:50 +08:00
br label %ret
ret:
%c = phi i32 [%A, %jump], [%B, %entry]
%p = phi i1 [true, %jump], [false, %entry]
%s = select i1 %p, i32 %A, i32 %c
ret i32 %s
}
define i32 @test29(i1 %cond, i32 %A, i32 %B) {
; CHECK-LABEL: @test29(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[COND:%.*]], label [[JUMP:%.*]], label [[RET:%.*]]
; CHECK: jump:
; CHECK-NEXT: br label [[RET]]
; CHECK: ret:
; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[NEXT:%.*]]
; CHECK: next:
; CHECK-NEXT: ret i32 [[P]]
;
entry:
br i1 %cond, label %jump, label %ret
jump:
[InstCombine] allow more than one use for vector bitcast folding with selects The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
2016-06-18 00:46:50 +08:00
br label %ret
ret:
%c = phi i32 [%A, %jump], [%B, %entry]
%p = phi i1 [true, %jump], [false, %entry]
br label %next
[InstCombine] allow more than one use for vector bitcast folding with selects The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
2016-06-18 00:46:50 +08:00
next:
%s = select i1 %p, i32 %A, i32 %c
ret i32 %s
}
; SMAX(SMAX(x, y), x) -> SMAX(x, y)
define i32 @test30(i32 %x, i32 %y) {
; CHECK-LABEL: @test30(
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[Y]]
; CHECK-NEXT: ret i32 [[COND]]
;
%cmp = icmp sgt i32 %x, %y
%cond = select i1 %cmp, i32 %x, i32 %y
%cmp5 = icmp sgt i32 %cond, %x
%retval = select i1 %cmp5, i32 %cond, i32 %x
ret i32 %retval
}
; UMAX(UMAX(x, y), x) -> UMAX(x, y)
define i32 @test31(i32 %x, i32 %y) {
; CHECK-LABEL: @test31(
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[Y]]
; CHECK-NEXT: ret i32 [[COND]]
;
[InstCombine] allow more than one use for vector bitcast folding with selects The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
2016-06-18 00:46:50 +08:00
%cmp = icmp ugt i32 %x, %y
%cond = select i1 %cmp, i32 %x, i32 %y
%cmp5 = icmp ugt i32 %cond, %x
%retval = select i1 %cmp5, i32 %cond, i32 %x
ret i32 %retval
}
; SMIN(SMIN(x, y), x) -> SMIN(x, y)
define i32 @test32(i32 %x, i32 %y) {
; CHECK-LABEL: @test32(
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[X]]
; CHECK-NEXT: ret i32 [[COND]]
;
%cmp = icmp sgt i32 %x, %y
%cond = select i1 %cmp, i32 %y, i32 %x
%cmp5 = icmp sgt i32 %cond, %x
%retval = select i1 %cmp5, i32 %x, i32 %cond
ret i32 %retval
}
; MAX(MIN(x, y), x) -> x
define i32 @test33(i32 %x, i32 %y) {
; CHECK-LABEL: @test33(
; CHECK-NEXT: ret i32 [[X:%.*]]
;
%cmp = icmp sgt i32 %x, %y
%cond = select i1 %cmp, i32 %y, i32 %x
%cmp5 = icmp sgt i32 %cond, %x
%retval = select i1 %cmp5, i32 %cond, i32 %x
ret i32 %retval
}
; MIN(MAX(x, y), x) -> x
define i32 @test34(i32 %x, i32 %y) {
; CHECK-LABEL: @test34(
; CHECK-NEXT: ret i32 [[X:%.*]]
;
%cmp = icmp sgt i32 %x, %y
%cond = select i1 %cmp, i32 %x, i32 %y
%cmp5 = icmp sgt i32 %cond, %x
%retval = select i1 %cmp5, i32 %x, i32 %cond
ret i32 %retval
}
define i1 @test38(i1 %cond) {
; CHECK-LABEL: @test38(
; CHECK-NEXT: ret i1 false
;
%zero = alloca i32
%one = alloca i32
%ptr = select i1 %cond, i32* %zero, i32* %one
%isnull = icmp eq i32* %ptr, null
ret i1 %isnull
}
define i1 @test39(i1 %cond, double %x) {
; CHECK-LABEL: @test39(
; CHECK-NEXT: ret i1 true
;
%s = select i1 %cond, double %x, double 0x7FF0000000000000 ; RHS = +infty
%cmp = fcmp ule double %x, %s
ret i1 %cmp
}
define i1 @test40(i1 %cond) {
; CHECK-LABEL: @test40(
; CHECK-NEXT: ret i1 false
;
%a = alloca i32
%b = alloca i32
%c = alloca i32
%s = select i1 %cond, i32* %a, i32* %b
%r = icmp eq i32* %s, %c
ret i1 %r
}
define i32 @test41(i1 %cond, i32 %x, i32 %y) {
; CHECK-LABEL: @test41(
; CHECK-NEXT: [[R:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i32 [[R]]
;
%z = and i32 %x, %y
%s = select i1 %cond, i32 %y, i32 %z
%r = and i32 %x, %s
ret i32 %r
}
define i32 @test42(i32 %x, i32 %y) {
; CHECK-LABEL: @test42(
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[X:%.*]], 0
; CHECK-NEXT: [[B:%.*]] = sext i1 [[COND]] to i32
; CHECK-NEXT: [[C:%.*]] = add i32 [[B]], [[Y:%.*]]
; CHECK-NEXT: ret i32 [[C]]
;
%b = add i32 %y, -1
%cond = icmp eq i32 %x, 0
%c = select i1 %cond, i32 %b, i32 %y
ret i32 %c
}
define <2 x i32> @test42vec(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: @test42vec(
; CHECK-NEXT: [[COND:%.*]] = icmp eq <2 x i32> [[X:%.*]], zeroinitializer
; CHECK-NEXT: [[B:%.*]] = sext <2 x i1> [[COND]] to <2 x i32>
; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B]], [[Y:%.*]]
; CHECK-NEXT: ret <2 x i32> [[C]]
;
%b = add <2 x i32> %y, <i32 -1, i32 -1>
%cond = icmp eq <2 x i32> %x, zeroinitializer
%c = select <2 x i1> %cond, <2 x i32> %b, <2 x i32> %y
ret <2 x i32> %c
}
; PR8994
; This select instruction can't be eliminated because trying to do so would
; change the number of vector elements. This used to assert.
define i48 @test51(<3 x i1> %icmp, <3 x i16> %tmp) {
; CHECK-LABEL: @test51(
; CHECK-NEXT: [[SELECT:%.*]] = select <3 x i1> [[ICMP:%.*]], <3 x i16> zeroinitializer, <3 x i16> [[TMP:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <3 x i16> [[SELECT]] to i48
; CHECK-NEXT: ret i48 [[TMP2]]
;
%select = select <3 x i1> %icmp, <3 x i16> zeroinitializer, <3 x i16> %tmp
%tmp2 = bitcast <3 x i16> %select to i48
ret i48 %tmp2
}
[InstCombine] allow more than one use for vector bitcast folding with selects The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
2016-06-18 00:46:50 +08:00
; Allow select promotion even if there are multiple uses of bitcasted ops.
; Hoisting the selects allows later pattern matching to see that these are min/max ops.
define void @min_max_bitcast(<4 x float> %a, <4 x float> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) {
; CHECK-LABEL: @min_max_bitcast(
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[SEL1_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[A]], <4 x float> [[B]]
; CHECK-NEXT: [[SEL2_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> [[B]], <4 x float> [[A]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32>* [[PTR1:%.*]] to <4 x float>*
[InstCombine] allow more than one use for vector bitcast folding with selects The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
2016-06-18 00:46:50 +08:00
; CHECK-NEXT: store <4 x float> [[SEL1_V]], <4 x float>* [[TMP1]], align 16
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32>* [[PTR2:%.*]] to <4 x float>*
[InstCombine] allow more than one use for vector bitcast folding with selects The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
2016-06-18 00:46:50 +08:00
; CHECK-NEXT: store <4 x float> [[SEL2_V]], <4 x float>* [[TMP2]], align 16
; CHECK-NEXT: ret void
;
%cmp = fcmp olt <4 x float> %a, %b
%bc1 = bitcast <4 x float> %a to <4 x i32>
%bc2 = bitcast <4 x float> %b to <4 x i32>
%sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2
%sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1
store <4 x i32> %sel1, <4 x i32>* %ptr1
store <4 x i32> %sel2, <4 x i32>* %ptr2
ret void
}
; To avoid potential backend problems, we don't do the same transform for other casts.
define void @truncs_before_selects(<4 x float> %f1, <4 x float> %f2, <4 x i64> %a, <4 x i64> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) {
; CHECK-LABEL: @truncs_before_selects(
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> [[F1:%.*]], [[F2:%.*]]
; CHECK-NEXT: [[BC1:%.*]] = trunc <4 x i64> [[A:%.*]] to <4 x i32>
; CHECK-NEXT: [[BC2:%.*]] = trunc <4 x i64> [[B:%.*]] to <4 x i32>
[InstCombine] allow more than one use for vector bitcast folding with selects The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
2016-06-18 00:46:50 +08:00
; CHECK-NEXT: [[SEL1:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[BC1]], <4 x i32> [[BC2]]
; CHECK-NEXT: [[SEL2:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[BC2]], <4 x i32> [[BC1]]
; CHECK-NEXT: store <4 x i32> [[SEL1]], <4 x i32>* [[PTR1:%.*]], align 16
; CHECK-NEXT: store <4 x i32> [[SEL2]], <4 x i32>* [[PTR2:%.*]], align 16
[InstCombine] allow more than one use for vector bitcast folding with selects The motivating example for this transform is similar to D20774 where bitcasts interfere with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to produce min and max ops: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %bc1 = bitcast <4 x float> %a to <4 x i32> %bc2 = bitcast <4 x float> %b to <4 x i32> %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2 %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1 %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>* store <4 x i32> %sel1, <4 x i32>* %bc3 %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>* store <4 x i32> %sel2, <4 x i32>* %bc4 ret void } With this patch, we move the selects up to use the input args which allows getting rid of all of the bitcasts: define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) { %cmp = fcmp olt <4 x float> %a, %b %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16 store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16 ret void } The asm for x86 SSE then improves from: movaps %xmm0, %xmm2 cmpltps %xmm1, %xmm2 movaps %xmm2, %xmm3 andnps %xmm1, %xmm3 movaps %xmm2, %xmm4 andnps %xmm0, %xmm4 andps %xmm2, %xmm0 orps %xmm3, %xmm0 andps %xmm1, %xmm2 orps %xmm4, %xmm2 movaps %xmm0, (%rdi) movaps %xmm2, (%rsi) To: movaps %xmm0, %xmm2 minps %xmm1, %xmm2 maxps %xmm0, %xmm1 movaps %xmm2, (%rdi) movaps %xmm1, (%rsi) The TODO comments show that we're limiting this transform only to vectors and only to bitcasts because we need to improve other transforms or risk creating worse codegen. Differential Revision: http://reviews.llvm.org/D21190 llvm-svn: 273011
2016-06-18 00:46:50 +08:00
; CHECK-NEXT: ret void
;
%cmp = fcmp olt <4 x float> %f1, %f2
%bc1 = trunc <4 x i64> %a to <4 x i32>
%bc2 = trunc <4 x i64> %b to <4 x i32>
%sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2
%sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1
store <4 x i32> %sel1, <4 x i32>* %ptr1, align 16
store <4 x i32> %sel2, <4 x i32>* %ptr2, align 16
ret void
}
; PR8575
define i32 @test52(i32 %n, i32 %m) {
; CHECK-LABEL: @test52(
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], [[M:%.*]]
; CHECK-NEXT: [[STOREMERGE:%.*]] = select i1 [[CMP]], i32 1, i32 6
; CHECK-NEXT: ret i32 [[STOREMERGE]]
;
%cmp = icmp sgt i32 %n, %m
%. = select i1 %cmp, i32 1, i32 3
%add = add nsw i32 %., 3
%storemerge = select i1 %cmp, i32 %., i32 %add
ret i32 %storemerge
}
; PR9454
define i32 @test53(i32 %x) {
; CHECK-LABEL: @test53(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 2
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], [[X]]
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 2, i32 1
; CHECK-NEXT: ret i32 [[SEL]]
;
%and = and i32 %x, 2
%cmp = icmp eq i32 %and, %x
%sel = select i1 %cmp, i32 2, i32 1
ret i32 %sel
}
define i32 @test54(i32 %X, i32 %Y) {
; CHECK-LABEL: @test54(
; CHECK-NEXT: [[B:%.*]] = icmp ne i32 [[X:%.*]], 0
; CHECK-NEXT: [[C:%.*]] = zext i1 [[B]] to i32
; CHECK-NEXT: ret i32 [[C]]
;
%A = ashr exact i32 %X, %Y
%B = icmp eq i32 %A, 0
%C = select i1 %B, i32 %A, i32 1
ret i32 %C
}
define i1 @test55(i1 %X, i32 %Y, i32 %Z) {
; CHECK-LABEL: @test55(
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[Y:%.*]], 0
; CHECK-NEXT: ret i1 [[C]]
;
%A = ashr exact i32 %Y, %Z
%B = select i1 %X, i32 %Y, i32 %A
%C = icmp eq i32 %B, 0
ret i1 %C
}
define i32 @test56(i16 %x) {
; CHECK-LABEL: @test56(
; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[X:%.*]] to i32
; CHECK-NEXT: ret i32 [[CONV]]
;
%tobool = icmp eq i16 %x, 0
%conv = zext i16 %x to i32
%cond = select i1 %tobool, i32 0, i32 %conv
ret i32 %cond
}
define i32 @test57(i32 %x, i32 %y) {
; CHECK-LABEL: @test57(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i32 [[AND]]
;
%and = and i32 %x, %y
%tobool = icmp eq i32 %x, 0
%.and = select i1 %tobool, i32 0, i32 %and
ret i32 %.and
}
define i32 @test58(i16 %x) {
; CHECK-LABEL: @test58(
; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[X:%.*]] to i32
; CHECK-NEXT: ret i32 [[CONV]]
;
%tobool = icmp ne i16 %x, 1
%conv = zext i16 %x to i32
%cond = select i1 %tobool, i32 %conv, i32 1
ret i32 %cond
}
define i32 @test59(i32 %x, i32 %y) {
; CHECK-LABEL: @test59(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i32 [[AND]]
;
%and = and i32 %x, %y
%tobool = icmp ne i32 %x, %y
%.and = select i1 %tobool, i32 %and, i32 %y
ret i32 %.and
}
define i1 @test60(i32 %x, i1* %y) {
; CHECK-LABEL: @test60(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
; CHECK-NEXT: [[LOAD:%.*]] = load i1, i1* [[Y:%.*]], align 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[X]], 1
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 [[LOAD]], i1 [[CMP1]]
; CHECK-NEXT: ret i1 [[SEL]]
;
%cmp = icmp eq i32 %x, 0
%load = load i1, i1* %y, align 1
%cmp1 = icmp slt i32 %x, 1
%sel = select i1 %cmp, i1 %load, i1 %cmp1
ret i1 %sel
}
@glbl = constant i32 10
define i32 @test61(i32* %ptr) {
; CHECK-LABEL: @test61(
; CHECK-NEXT: ret i32 10
;
%A = load i32, i32* %ptr
%B = icmp eq i32* %ptr, @glbl
%C = select i1 %B, i32 %A, i32 10
ret i32 %C
}
; PR14131
define void @test64(i32 %p, i16 %b) noreturn {
; CHECK-LABEL: @test64(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 undef, label [[LOR_RHS:%.*]], label [[LOR_END:%.*]]
; CHECK: lor.rhs:
; CHECK-NEXT: br label [[LOR_END]]
; CHECK: lor.end:
; CHECK-NEXT: br i1 true, label [[COND_END17:%.*]], label [[COND_FALSE16:%.*]]
; CHECK: cond.false16:
; CHECK-NEXT: br label [[COND_END17]]
; CHECK: cond.end17:
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: br label [[WHILE_BODY]]
;
entry:
%p.addr.0.insert.mask = and i32 %p, -65536
%conv2 = and i32 %p, 65535
br i1 undef, label %lor.rhs, label %lor.end
lor.rhs:
%p.addr.0.extract.trunc = trunc i32 %p.addr.0.insert.mask to i16
%phitmp = zext i16 %p.addr.0.extract.trunc to i32
br label %lor.end
lor.end:
%t.1 = phi i32 [ 0, %entry ], [ %phitmp, %lor.rhs ]
%conv6 = zext i16 %b to i32
%div = udiv i32 %conv6, %t.1
%tobool8 = icmp eq i32 %div, 0
%cmp = icmp eq i32 %t.1, 0
%cmp12 = icmp ult i32 %conv2, 2
%cmp.sink = select i1 %tobool8, i1 %cmp12, i1 %cmp
br i1 %cmp.sink, label %cond.end17, label %cond.false16
cond.false16:
br label %cond.end17
cond.end17:
br label %while.body
while.body:
br label %while.body
}
@under_aligned = external global i32, align 1
; The load here must not be speculated around the select. One side of the
; select is trivially dereferenceable but may have a lower alignment than the
; load does.
define i32 @test76(i1 %flag, i32* %x) {
; CHECK-LABEL: @test76(
; CHECK-NEXT: store i32 0, i32* [[X:%.*]], align 4
; CHECK-NEXT: [[P:%.*]] = select i1 [[FLAG:%.*]], i32* @under_aligned, i32* [[X]]
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
store i32 0, i32* %x
%p = select i1 %flag, i32* @under_aligned, i32* %x
%v = load i32, i32* %p
ret i32 %v
}
declare void @scribble_on_i32(i32*)
; The load here must not be speculated around the select. One side of the
; select is trivially dereferenceable but may have a lower alignment than the
; load does.
define i32 @test77(i1 %flag, i32* %x) {
; CHECK-LABEL: @test77(
; CHECK-NEXT: [[UNDER_ALIGNED:%.*]] = alloca i32, align 1
; CHECK-NEXT: call void @scribble_on_i32(i32* nonnull [[UNDER_ALIGNED]])
; CHECK-NEXT: store i32 0, i32* [[X:%.*]], align 4
; CHECK-NEXT: [[P:%.*]] = select i1 [[FLAG:%.*]], i32* [[UNDER_ALIGNED]], i32* [[X]]
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
%under_aligned = alloca i32, align 1
call void @scribble_on_i32(i32* %under_aligned)
store i32 0, i32* %x
%p = select i1 %flag, i32* %under_aligned, i32* %x
%v = load i32, i32* %p
ret i32 %v
}
define i32 @test78(i1 %flag, i32* %x, i32* %y, i32* %z) {
; Test that we can speculate the loads around the select even when we can't
; fold the load completely away.
; CHECK-LABEL: @test78(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i32 0, i32* [[X:%.*]], align 4
; CHECK-NEXT: store i32 0, i32* [[Y:%.*]], align 4
; CHECK-NEXT: store i32 42, i32* [[Z:%.*]], align 4
; CHECK-NEXT: [[X_VAL:%.*]] = load i32, i32* [[X]], align 4
; CHECK-NEXT: [[Y_VAL:%.*]] = load i32, i32* [[Y]], align 4
; CHECK-NEXT: [[V:%.*]] = select i1 [[FLAG:%.*]], i32 [[X_VAL]], i32 [[Y_VAL]]
; CHECK-NEXT: ret i32 [[V]]
;
entry:
store i32 0, i32* %x
store i32 0, i32* %y
; Block forwarding by storing to %z which could alias either %x or %y.
store i32 42, i32* %z
%p = select i1 %flag, i32* %x, i32* %y
%v = load i32, i32* %p
ret i32 %v
}
; Test that we can speculate the loads around the select even when we can't
; fold the load completely away.
define i32 @test78_deref(i1 %flag, i32* dereferenceable(4) %x, i32* dereferenceable(4) %y, i32* %z) {
; CHECK-LABEL: @test78_deref(
; CHECK-NEXT: [[X_VAL:%.*]] = load i32, i32* [[X:%.*]], align 4
; CHECK-NEXT: [[Y_VAL:%.*]] = load i32, i32* [[Y:%.*]], align 4
; CHECK-NEXT: [[V:%.*]] = select i1 [[FLAG:%.*]], i32 [[X_VAL]], i32 [[Y_VAL]]
; CHECK-NEXT: ret i32 [[V]]
;
%p = select i1 %flag, i32* %x, i32* %y
%v = load i32, i32* %p
ret i32 %v
}
; The same as @test78 but we can't speculate the load because it can trap
; if under-aligned.
define i32 @test78_neg(i1 %flag, i32* %x, i32* %y, i32* %z) {
; CHECK-LABEL: @test78_neg(
; CHECK-NEXT: store i32 0, i32* [[X:%.*]], align 4
; CHECK-NEXT: store i32 0, i32* [[Y:%.*]], align 4
; CHECK-NEXT: store i32 42, i32* [[Z:%.*]], align 4
; CHECK-NEXT: [[P:%.*]] = select i1 [[FLAG:%.*]], i32* [[X]], i32* [[Y]]
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P]], align 16
; CHECK-NEXT: ret i32 [[V]]
;
store i32 0, i32* %x
store i32 0, i32* %y
; Block forwarding by storing to %z which could alias either %x or %y.
store i32 42, i32* %z
%p = select i1 %flag, i32* %x, i32* %y
%v = load i32, i32* %p, align 16
ret i32 %v
}
; The same as @test78_deref but we can't speculate the load because
; one of the arguments is not sufficiently dereferenceable.
define i32 @test78_deref_neg(i1 %flag, i32* dereferenceable(2) %x, i32* dereferenceable(4) %y, i32* %z) {
; CHECK-LABEL: @test78_deref_neg(
; CHECK-NEXT: [[P:%.*]] = select i1 [[FLAG:%.*]], i32* [[X:%.*]], i32* [[Y:%.*]]
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P]], align 4
; CHECK-NEXT: ret i32 [[V]]
;
%p = select i1 %flag, i32* %x, i32* %y
%v = load i32, i32* %p
ret i32 %v
}
; Test that we can speculate the loads around the select even when we can't
; fold the load completely away.
define float @test79(i1 %flag, float* %x, i32* %y, i32* %z) {
; CHECK-LABEL: @test79(
; CHECK-NEXT: [[X1:%.*]] = bitcast float* [[X:%.*]] to i32*
; CHECK-NEXT: [[Y1:%.*]] = bitcast i32* [[Y:%.*]] to float*
; CHECK-NEXT: store i32 0, i32* [[X1]], align 4
; CHECK-NEXT: store i32 0, i32* [[Y]], align 4
; CHECK-NEXT: store i32 42, i32* [[Z:%.*]], align 4
; CHECK-NEXT: [[X_VAL:%.*]] = load float, float* [[X]], align 4
; CHECK-NEXT: [[Y1_VAL:%.*]] = load float, float* [[Y1]], align 4
; CHECK-NEXT: [[V:%.*]] = select i1 [[FLAG:%.*]], float [[X_VAL]], float [[Y1_VAL]]
; CHECK-NEXT: ret float [[V]]
;
%x1 = bitcast float* %x to i32*
%y1 = bitcast i32* %y to float*
store i32 0, i32* %x1
store i32 0, i32* %y
; Block forwarding by storing to %z which could alias either %x or %y.
store i32 42, i32* %z
%p = select i1 %flag, float* %x, float* %y1
%v = load float, float* %p
ret float %v
}
; Test that when we speculate the loads around the select they fold throug
; load->load folding and load->store folding.
define i32 @test80(i1 %flag) {
; CHECK-LABEL: @test80(
; CHECK-NEXT: [[X:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[Y:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @scribble_on_i32(i32* nonnull [[X]])
; CHECK-NEXT: call void @scribble_on_i32(i32* nonnull [[Y]])
; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[X]], align 4
; CHECK-NEXT: store i32 [[TMP]], i32* [[Y]], align 4
; CHECK-NEXT: ret i32 [[TMP]]
;
%x = alloca i32
%y = alloca i32
call void @scribble_on_i32(i32* %x)
call void @scribble_on_i32(i32* %y)
%tmp = load i32, i32* %x
store i32 %tmp, i32* %y
%p = select i1 %flag, i32* %x, i32* %y
%v = load i32, i32* %p
ret i32 %v
}
; Test that we can speculate the load around the select even though they use
; differently typed pointers.
define float @test81(i1 %flag) {
; CHECK-LABEL: @test81(
; CHECK-NEXT: [[X:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[Y:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @scribble_on_i32(i32* nonnull [[X]])
; CHECK-NEXT: call void @scribble_on_i32(i32* nonnull [[Y]])
; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[X]], align 4
; CHECK-NEXT: store i32 [[TMP]], i32* [[Y]], align 4
; CHECK-NEXT: [[V:%.*]] = bitcast i32 [[TMP]] to float
; CHECK-NEXT: ret float [[V]]
;
%x = alloca float
%y = alloca i32
%x1 = bitcast float* %x to i32*
%y1 = bitcast i32* %y to float*
call void @scribble_on_i32(i32* %x1)
call void @scribble_on_i32(i32* %y)
%tmp = load i32, i32* %x1
store i32 %tmp, i32* %y
%p = select i1 %flag, float* %x, float* %y1
%v = load float, float* %p
ret float %v
}
; Test that we can speculate the load around the select even though they use
; differently typed pointers.
define i32 @test82(i1 %flag) {
; CHECK-LABEL: @test82(
; CHECK-NEXT: [[X:%.*]] = alloca float, align 4
; CHECK-NEXT: [[Y:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[X1:%.*]] = bitcast float* [[X]] to i32*
; CHECK-NEXT: [[Y1:%.*]] = bitcast i32* [[Y]] to float*
; CHECK-NEXT: call void @scribble_on_i32(i32* [[X1]])
; CHECK-NEXT: call void @scribble_on_i32(i32* nonnull [[Y]])
; CHECK-NEXT: [[TMP:%.*]] = load float, float* [[X]], align 4
; CHECK-NEXT: store float [[TMP]], float* [[Y1]], align 4
; CHECK-NEXT: [[V:%.*]] = bitcast float [[TMP]] to i32
; CHECK-NEXT: ret i32 [[V]]
;
%x = alloca float
%y = alloca i32
%x1 = bitcast float* %x to i32*
%y1 = bitcast i32* %y to float*
call void @scribble_on_i32(i32* %x1)
call void @scribble_on_i32(i32* %y)
%tmp = load float, float* %x
store float %tmp, float* %y1
%p = select i1 %flag, i32* %x1, i32* %y
%v = load i32, i32* %p
ret i32 %v
}
declare void @scribble_on_i64(i64*)
declare void @scribble_on_i128(i128*)
; Test that we can speculate the load around the select even though they use
; differently typed pointers and requires inttoptr casts.
define i8* @test83(i1 %flag) {
; CHECK-LABEL: @test83(
; CHECK-NEXT: [[X:%.*]] = alloca i8*, align 8
; CHECK-NEXT: [[Y:%.*]] = alloca i8*, align 8
; CHECK-NEXT: [[TMPCAST:%.*]] = bitcast i8** [[Y]] to i64*
; CHECK-NEXT: [[X1:%.*]] = bitcast i8** [[X]] to i64*
; CHECK-NEXT: call void @scribble_on_i64(i64* [[X1]])
; CHECK-NEXT: call void @scribble_on_i64(i64* [[TMPCAST]])
; CHECK-NEXT: [[TMP:%.*]] = load i64, i64* [[X1]], align 8
; CHECK-NEXT: store i64 [[TMP]], i64* [[TMPCAST]], align 8
; CHECK-NEXT: [[V:%.*]] = inttoptr i64 [[TMP]] to i8*
; CHECK-NEXT: ret i8* [[V]]
;
%x = alloca i8*
%y = alloca i64
%x1 = bitcast i8** %x to i64*
%y1 = bitcast i64* %y to i8**
call void @scribble_on_i64(i64* %x1)
call void @scribble_on_i64(i64* %y)
%tmp = load i64, i64* %x1
store i64 %tmp, i64* %y
%p = select i1 %flag, i8** %x, i8** %y1
%v = load i8*, i8** %p
ret i8* %v
}
; Test that we can speculate the load around the select even though they use
; differently typed pointers and requires a ptrtoint cast.
define i64 @test84(i1 %flag) {
; CHECK-LABEL: @test84(
; CHECK-NEXT: [[X:%.*]] = alloca i8*, align 8
; CHECK-NEXT: [[Y:%.*]] = alloca i8*, align 8
; CHECK-NEXT: [[TMPCAST:%.*]] = bitcast i8** [[Y]] to i64*
; CHECK-NEXT: [[X1:%.*]] = bitcast i8** [[X]] to i64*
; CHECK-NEXT: call void @scribble_on_i64(i64* [[X1]])
; CHECK-NEXT: call void @scribble_on_i64(i64* [[TMPCAST]])
; CHECK-NEXT: [[TMP:%.*]] = load i8*, i8** [[X]], align 8
; CHECK-NEXT: store i8* [[TMP]], i8** [[Y]], align 8
; CHECK-NEXT: [[V:%.*]] = ptrtoint i8* [[TMP]] to i64
; CHECK-NEXT: ret i64 [[V]]
;
%x = alloca i8*
%y = alloca i64
%x1 = bitcast i8** %x to i64*
%y1 = bitcast i64* %y to i8**
call void @scribble_on_i64(i64* %x1)
call void @scribble_on_i64(i64* %y)
%tmp = load i8*, i8** %x
store i8* %tmp, i8** %y1
%p = select i1 %flag, i64* %x1, i64* %y
%v = load i64, i64* %p
ret i64 %v
}
; Test that we can't speculate the load around the select. The load of the
; pointer doesn't load all of the stored integer bits. We could fix this, but it
; would require endianness checks and other nastiness.
define i8* @test85(i1 %flag) {
; CHECK-LABEL: @test85(
; CHECK-NEXT: [[X1:%.*]] = alloca [2 x i8*], align 8
; CHECK-NEXT: [[Y:%.*]] = alloca i128, align 8
; CHECK-NEXT: [[X1_SUB:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[X1]], i64 0, i64 0
; CHECK-NEXT: [[X2:%.*]] = bitcast [2 x i8*]* [[X1]] to i128*
; CHECK-NEXT: [[Y1:%.*]] = bitcast i128* [[Y]] to i8**
; CHECK-NEXT: call void @scribble_on_i128(i128* [[X2]])
; CHECK-NEXT: call void @scribble_on_i128(i128* nonnull [[Y]])
; CHECK-NEXT: [[TMP:%.*]] = load i128, i128* [[X2]], align 8
; CHECK-NEXT: store i128 [[TMP]], i128* [[Y]], align 8
; CHECK-NEXT: [[X1_SUB_VAL:%.*]] = load i8*, i8** [[X1_SUB]], align 8
; CHECK-NEXT: [[Y1_VAL:%.*]] = load i8*, i8** [[Y1]], align 8
; CHECK-NEXT: [[V:%.*]] = select i1 [[FLAG:%.*]], i8* [[X1_SUB_VAL]], i8* [[Y1_VAL]]
; CHECK-NEXT: ret i8* [[V]]
;
%x = alloca [2 x i8*]
%y = alloca i128
%x1 = bitcast [2 x i8*]* %x to i8**
%x2 = bitcast i8** %x1 to i128*
%y1 = bitcast i128* %y to i8**
call void @scribble_on_i128(i128* %x2)
call void @scribble_on_i128(i128* %y)
%tmp = load i128, i128* %x2
store i128 %tmp, i128* %y
%p = select i1 %flag, i8** %x1, i8** %y1
%v = load i8*, i8** %p
ret i8* %v
}
; Test that we can't speculate the load around the select when the integer size
; is larger than the pointer size. The store of the pointer doesn't store to all
; the bits of the integer.
define i128 @test86(i1 %flag) {
; CHECK-LABEL: @test86(
; CHECK-NEXT: [[X1:%.*]] = alloca [2 x i8*], align 8
; CHECK-NEXT: [[Y:%.*]] = alloca i128, align 8
; CHECK-NEXT: [[X1_SUB:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[X1]], i64 0, i64 0
; CHECK-NEXT: [[X2:%.*]] = bitcast [2 x i8*]* [[X1]] to i128*
; CHECK-NEXT: [[Y1:%.*]] = bitcast i128* [[Y]] to i8**
; CHECK-NEXT: call void @scribble_on_i128(i128* [[X2]])
; CHECK-NEXT: call void @scribble_on_i128(i128* nonnull [[Y]])
; CHECK-NEXT: [[TMP:%.*]] = load i8*, i8** [[X1_SUB]], align 8
; CHECK-NEXT: store i8* [[TMP]], i8** [[Y1]], align 8
; CHECK-NEXT: [[X2_VAL:%.*]] = load i128, i128* [[X2]], align 8
; CHECK-NEXT: [[Y_VAL:%.*]] = load i128, i128* [[Y]], align 8
; CHECK-NEXT: [[V:%.*]] = select i1 [[FLAG:%.*]], i128 [[X2_VAL]], i128 [[Y_VAL]]
; CHECK-NEXT: ret i128 [[V]]
;
%x = alloca [2 x i8*]
%y = alloca i128
%x1 = bitcast [2 x i8*]* %x to i8**
%x2 = bitcast i8** %x1 to i128*
%y1 = bitcast i128* %y to i8**
call void @scribble_on_i128(i128* %x2)
call void @scribble_on_i128(i128* %y)
%tmp = load i8*, i8** %x1
store i8* %tmp, i8** %y1
%p = select i1 %flag, i128* %x2, i128* %y
%v = load i128, i128* %p
ret i128 %v
}
define i32 @test_select_select0(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) {
; CHECK-LABEL: @test_select_select0(
; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[A:%.*]], [[V1:%.*]]
; CHECK-NEXT: [[S0:%.*]] = select i1 [[C0]], i32 [[R1:%.*]], i32 [[R0:%.*]]
; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[A]], [[V2:%.*]]
; CHECK-NEXT: [[S1:%.*]] = select i1 [[C1]], i32 [[S0]], i32 [[R1]]
; CHECK-NEXT: ret i32 [[S1]]
;
%c0 = icmp sge i32 %a, %v1
%s0 = select i1 %c0, i32 %r0, i32 %r1
%c1 = icmp slt i32 %a, %v2
%s1 = select i1 %c1, i32 %s0, i32 %r1
ret i32 %s1
}
define i32 @test_select_select1(i32 %a, i32 %r0, i32 %r1, i32 %v1, i32 %v2) {
; CHECK-LABEL: @test_select_select1(
; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[A:%.*]], [[V1:%.*]]
; CHECK-NEXT: [[S0:%.*]] = select i1 [[C0]], i32 [[R1:%.*]], i32 [[R0:%.*]]
; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[A]], [[V2:%.*]]
; CHECK-NEXT: [[S1:%.*]] = select i1 [[C1]], i32 [[R0]], i32 [[S0]]
; CHECK-NEXT: ret i32 [[S1]]
;
%c0 = icmp sge i32 %a, %v1
%s0 = select i1 %c0, i32 %r0, i32 %r1
%c1 = icmp slt i32 %a, %v2
%s1 = select i1 %c1, i32 %r0, i32 %s0
ret i32 %s1
}
define i32 @PR23757(i32 %x) {
; CHECK-LABEL: @PR23757(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 2147483647
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X]], 1
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 -2147483648, i32 [[ADD]]
; CHECK-NEXT: ret i32 [[SEL]]
;
%cmp = icmp eq i32 %x, 2147483647
%add = add nsw i32 %x, 1
%sel = select i1 %cmp, i32 -2147483648, i32 %add
ret i32 %sel
}
; max(max(~a, -1), -1) --> ~min(a, 0)
define i32 @PR27137(i32 %a) {
; CHECK-LABEL: @PR27137(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[A:%.*]], 0
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 0
; CHECK-NEXT: [[S1:%.*]] = xor i32 [[TMP2]], -1
; CHECK-NEXT: ret i32 [[S1]]
;
%not_a = xor i32 %a, -1
%c0 = icmp slt i32 %a, 0
%s0 = select i1 %c0, i32 %not_a, i32 -1
%c1 = icmp sgt i32 %s0, -1
%s1 = select i1 %c1, i32 %s0, i32 -1
ret i32 %s1
}
define i32 @select_icmp_slt0_xor(i32 %x) {
; CHECK-LABEL: @select_icmp_slt0_xor(
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], -2147483648
; CHECK-NEXT: ret i32 [[TMP1]]
;
%cmp = icmp slt i32 %x, zeroinitializer
%xor = xor i32 %x, 2147483648
%x.xor = select i1 %cmp, i32 %x, i32 %xor
ret i32 %x.xor
}
define <2 x i32> @select_icmp_slt0_xor_vec(<2 x i32> %x) {
; CHECK-LABEL: @select_icmp_slt0_xor_vec(
; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], <i32 -2147483648, i32 -2147483648>
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
;
%cmp = icmp slt <2 x i32> %x, zeroinitializer
%xor = xor <2 x i32> %x, <i32 2147483648, i32 2147483648>
%x.xor = select <2 x i1> %cmp, <2 x i32> %x, <2 x i32> %xor
ret <2 x i32> %x.xor
}
define <4 x i32> @canonicalize_to_shuffle(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @canonicalize_to_shuffle(
; CHECK-NEXT: [[SEL:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: ret <4 x i32> [[SEL]]
;
%sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %sel
}
; Undef elements of the select condition may not be translated into undef elements of a shuffle mask
; because undef in a shuffle mask means we can return anything, not just one of the selected values.
; https://bugs.llvm.org/show_bug.cgi?id=32486
define <4 x i32> @undef_elts_in_condition(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @undef_elts_in_condition(
; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> <i1 true, i1 undef, i1 false, i1 undef>, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]
; CHECK-NEXT: ret <4 x i32> [[SEL]]
;
%sel = select <4 x i1> <i1 true, i1 undef, i1 false, i1 undef>, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %sel
}
; Don't die or try if the condition mask is a constant expression or contains a constant expression.
@g = global i32 0
define <4 x i32> @cannot_canonicalize_to_shuffle1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @cannot_canonicalize_to_shuffle1(
; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> bitcast (i4 ptrtoint (i32* @g to i4) to <4 x i1>), <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]
; CHECK-NEXT: ret <4 x i32> [[SEL]]
;
%sel = select <4 x i1> bitcast (i4 ptrtoint (i32* @g to i4) to <4 x i1>), <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %sel
}
define <4 x i32> @cannot_canonicalize_to_shuffle2(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @cannot_canonicalize_to_shuffle2(
; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> <i1 true, i1 undef, i1 false, i1 icmp sle (i16 ptrtoint (i32* @g to i16), i16 4)>, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]
; CHECK-NEXT: ret <4 x i32> [[SEL]]
;
%sel = select <4 x i1> <i1 true, i1 undef, i1 false, i1 icmp sle (i16 ptrtoint (i32* @g to i16), i16 4)>, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %sel
}
declare void @llvm.assume(i1)
define i8 @assume_cond_true(i1 %cond, i8 %x, i8 %y) {
; CHECK-LABEL: @assume_cond_true(
; CHECK-NEXT: call void @llvm.assume(i1 [[COND:%.*]])
; CHECK-NEXT: ret i8 [[X:%.*]]
;
call void @llvm.assume(i1 %cond)
%sel = select i1 %cond, i8 %x, i8 %y
ret i8 %sel
}
; computeKnownBitsFromAssume() understands the 'not' of an assumed condition.
define i8 @assume_cond_false(i1 %cond, i8 %x, i8 %y) {
; CHECK-LABEL: @assume_cond_false(
; CHECK-NEXT: [[NOTCOND:%.*]] = xor i1 [[COND:%.*]], true
; CHECK-NEXT: call void @llvm.assume(i1 [[NOTCOND]])
; CHECK-NEXT: ret i8 [[Y:%.*]]
;
%notcond = xor i1 %cond, true
call void @llvm.assume(i1 %notcond)
%sel = select i1 %cond, i8 %x, i8 %y
ret i8 %sel
}
; Test case to make sure we don't consider an all ones float values for converting the select into a sext.
define <4 x float> @PR33721(<4 x float> %w) {
; CHECK-LABEL: @PR33721(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <4 x float> [[W:%.*]], zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, <4 x float> zeroinitializer
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
entry:
%0 = fcmp ole <4 x float> %w, zeroinitializer
%1 = select <4 x i1> %0, <4 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, <4 x float> zeroinitializer
ret <4 x float> %1
}
; select(C, binop(select(C, X, Y), W), Z) -> select(C, binop(X, W), Z)
define i8 @test87(i1 %cond, i8 %w, i8 %x, i8 %y, i8 %z) {
; CHECK-LABEL: @test87(
; CHECK-NEXT: [[B:%.*]] = add i8 [[X:%.*]], [[W:%.*]]
; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], i8 [[B]], i8 [[Z:%.*]]
; CHECK-NEXT: ret i8 [[C]]
;
%a = select i1 %cond, i8 %x, i8 %y
%b = add i8 %a, %w
%c = select i1 %cond, i8 %b, i8 %z
ret i8 %c
}
; select(C, binop(select(C, X, Y), W), Z) -> select(C, Z, binop(Y, W))
define i8 @test88(i1 %cond, i8 %w, i8 %x, i8 %y, i8 %z) {
; CHECK-LABEL: @test88(
; CHECK-NEXT: [[B:%.*]] = sub i8 [[Y:%.*]], [[W:%.*]]
; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], i8 [[Z:%.*]], i8 [[B]]
; CHECK-NEXT: ret i8 [[C]]
;
%a = select i1 %cond, i8 %x, i8 %y
%b = sub i8 %a, %w
%c = select i1 %cond, i8 %z, i8 %b
ret i8 %c
}
; select(C, Z, binop(W, select(C, X, Y))) -> select(C, binop(X, W), Z)
define i8 @test89(i1 %cond, i8 %w, i8 %x, i8 %y, i8 %z) {
; CHECK-LABEL: @test89(
; CHECK-NEXT: [[B:%.*]] = and i8 [[X:%.*]], [[W:%.*]]
; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], i8 [[B]], i8 [[Z:%.*]]
; CHECK-NEXT: ret i8 [[C]]
;
%a = select i1 %cond, i8 %x, i8 %y
%b = and i8 %w, %a
%c = select i1 %cond, i8 %b, i8 %z
ret i8 %c
}
; select(C, Z, binop(W, select(C, X, Y))) -> select(C, Z, binop(W, Y))
define i8 @test90(i1 %cond, i8 %w, i8 %x, i8 %y, i8 %z) {
; CHECK-LABEL: @test90(
; CHECK-NEXT: [[B:%.*]] = or i8 [[Y:%.*]], [[W:%.*]]
; CHECK-NEXT: [[C:%.*]] = select i1 [[COND:%.*]], i8 [[Z:%.*]], i8 [[B]]
; CHECK-NEXT: ret i8 [[C]]
;
%a = select i1 %cond, i8 %x, i8 %y
%b = or i8 %w, %a
%c = select i1 %cond, i8 %z, i8 %b
ret i8 %c
}