llvm-project/llvm/test/Transforms/InstCombine/logical-select.ll

524 lines
20 KiB
LLVM
Raw Normal View History

2016-05-28 23:44:28 +08:00
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
2016-05-28 23:44:28 +08:00
define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: [[E:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[J:%.*]] = select i1 [[E]], i32 %c, i32 %d
; CHECK-NEXT: ret i32 [[J]]
;
%e = icmp slt i32 %a, %b
%f = sext i1 %e to i32
%g = and i32 %c, %f
%h = xor i32 %f, -1
%i = and i32 %d, %h
%j = or i32 %g, %i
ret i32 %j
}
2016-05-28 23:44:28 +08:00
define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @bar(
; CHECK-NEXT: [[E:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[J:%.*]] = select i1 [[E]], i32 %c, i32 %d
; CHECK-NEXT: ret i32 [[J]]
;
%e = icmp slt i32 %a, %b
%f = sext i1 %e to i32
%g = and i32 %c, %f
%h = xor i32 %f, -1
%i = and i32 %d, %h
%j = or i32 %i, %g
ret i32 %j
}
2016-05-28 23:44:28 +08:00
define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @goo(
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 %c, i32 %d
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = icmp slt i32 %a, %b
%iftmp.0.0 = select i1 %t0, i32 -1, i32 0
%t1 = and i32 %iftmp.0.0, %c
%not = xor i32 %iftmp.0.0, -1
2016-05-28 23:44:28 +08:00
%t2 = and i32 %not, %d
%t3 = or i32 %t1, %t2
ret i32 %t3
}
2016-05-28 23:44:28 +08:00
define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @poo(
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 %c, i32 %d
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = icmp slt i32 %a, %b
%iftmp.0.0 = select i1 %t0, i32 -1, i32 0
%t1 = and i32 %iftmp.0.0, %c
%iftmp = select i1 %t0, i32 0, i32 -1
%t2 = and i32 %iftmp, %d
%t3 = or i32 %t1, %t2
ret i32 %t3
}
; PR32791 - https://bugs.llvm.org//show_bug.cgi?id=32791
; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
define i32 @fold_inverted_icmp_preds(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @fold_inverted_icmp_preds(
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 %c, i32 0
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 %d
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]]
; CHECK-NEXT: ret i32 [[OR]]
;
%cmp1 = icmp slt i32 %a, %b
%sel1 = select i1 %cmp1, i32 %c, i32 0
%cmp2 = icmp sge i32 %a, %b
%sel2 = select i1 %cmp2, i32 %d, i32 0
%or = or i32 %sel1, %sel2
ret i32 %or
}
; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
define i32 @fold_inverted_icmp_preds_reverse(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @fold_inverted_icmp_preds_reverse(
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 %c
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 %d, i32 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]]
; CHECK-NEXT: ret i32 [[OR]]
;
%cmp1 = icmp slt i32 %a, %b
%sel1 = select i1 %cmp1, i32 0, i32 %c
%cmp2 = icmp sge i32 %a, %b
%sel2 = select i1 %cmp2, i32 0, i32 %d
%or = or i32 %sel1, %sel2
ret i32 %or
}
; TODO: Should fcmp have the same sort of predicate canonicalization as icmp?
define i32 @fold_inverted_fcmp_preds(float %a, float %b, i32 %c, i32 %d) {
; CHECK-LABEL: @fold_inverted_fcmp_preds(
; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float %a, %b
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 %c, i32 0
; CHECK-NEXT: [[CMP2:%.*]] = fcmp uge float %a, %b
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 %d, i32 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]]
; CHECK-NEXT: ret i32 [[OR]]
;
%cmp1 = fcmp olt float %a, %b
%sel1 = select i1 %cmp1, i32 %c, i32 0
%cmp2 = fcmp uge float %a, %b
%sel2 = select i1 %cmp2, i32 %d, i32 0
%or = or i32 %sel1, %sel2
ret i32 %or
}
; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
define <2 x i32> @fold_inverted_icmp_vector_preds(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
; CHECK-LABEL: @fold_inverted_icmp_vector_preds(
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq <2 x i32> %a, %b
; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> zeroinitializer, <2 x i32> %c
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq <2 x i32> %a, %b
; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> %d, <2 x i32> zeroinitializer
; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[SEL1]], [[SEL2]]
; CHECK-NEXT: ret <2 x i32> [[OR]]
;
%cmp1 = icmp ne <2 x i32> %a, %b
%sel1 = select <2 x i1> %cmp1, <2 x i32> %c, <2 x i32> <i32 0, i32 0>
%cmp2 = icmp eq <2 x i32> %a, %b
%sel2 = select <2 x i1> %cmp2, <2 x i32> %d, <2 x i32> <i32 0, i32 0>
%or = or <2 x i32> %sel1, %sel2
ret <2 x i32> %or
}
2016-05-28 23:44:28 +08:00
define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @par(
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 %a, %b
; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 %c, i32 %d
; CHECK-NEXT: ret i32 [[T3]]
;
%t0 = icmp slt i32 %a, %b
%iftmp.1.0 = select i1 %t0, i32 -1, i32 0
%t1 = and i32 %iftmp.1.0, %c
%not = xor i32 %iftmp.1.0, -1
2016-05-28 23:44:28 +08:00
%t2 = and i32 %not, %d
%t3 = or i32 %t1, %t2
ret i32 %t3
}
; In the following tests (8 commutation variants), verify that a bitcast doesn't get
; in the way of a select transform. These bitcasts are common in SSE/AVX and possibly
; other vector code because of canonicalization to i64 elements for vectors.
; The fptosi instructions are included to avoid commutation canonicalization based on
; operator weight. Using another cast operator ensures that both operands of all logic
; ops are equally weighted, and this ensures that we're testing all commutation
; possibilities.
define <2 x i64> @bitcast_select_swap0(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @bitcast_select_swap0(
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[OR]]
;
%sia = fptosi <2 x double> %a to <2 x i64>
%sib = fptosi <2 x double> %b to <2 x i64>
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
%and1 = and <2 x i64> %bc1, %sia
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %bc2, %sib
%or = or <2 x i64> %and1, %and2
ret <2 x i64> %or
}
define <2 x i64> @bitcast_select_swap1(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @bitcast_select_swap1(
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[OR]]
;
%sia = fptosi <2 x double> %a to <2 x i64>
%sib = fptosi <2 x double> %b to <2 x i64>
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
%and1 = and <2 x i64> %bc1, %sia
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %bc2, %sib
%or = or <2 x i64> %and2, %and1
ret <2 x i64> %or
}
define <2 x i64> @bitcast_select_swap2(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @bitcast_select_swap2(
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[OR]]
;
%sia = fptosi <2 x double> %a to <2 x i64>
%sib = fptosi <2 x double> %b to <2 x i64>
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
%and1 = and <2 x i64> %bc1, %sia
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %sib, %bc2
%or = or <2 x i64> %and1, %and2
ret <2 x i64> %or
}
define <2 x i64> @bitcast_select_swap3(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @bitcast_select_swap3(
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[OR]]
;
%sia = fptosi <2 x double> %a to <2 x i64>
%sib = fptosi <2 x double> %b to <2 x i64>
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
%and1 = and <2 x i64> %bc1, %sia
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %sib, %bc2
%or = or <2 x i64> %and2, %and1
ret <2 x i64> %or
}
define <2 x i64> @bitcast_select_swap4(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @bitcast_select_swap4(
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[OR]]
;
%sia = fptosi <2 x double> %a to <2 x i64>
%sib = fptosi <2 x double> %b to <2 x i64>
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
%and1 = and <2 x i64> %sia, %bc1
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %bc2, %sib
%or = or <2 x i64> %and1, %and2
ret <2 x i64> %or
}
define <2 x i64> @bitcast_select_swap5(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @bitcast_select_swap5(
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[OR]]
;
%sia = fptosi <2 x double> %a to <2 x i64>
%sib = fptosi <2 x double> %b to <2 x i64>
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
%and1 = and <2 x i64> %sia, %bc1
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %bc2, %sib
%or = or <2 x i64> %and2, %and1
ret <2 x i64> %or
}
define <2 x i64> @bitcast_select_swap6(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @bitcast_select_swap6(
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[OR]]
;
%sia = fptosi <2 x double> %a to <2 x i64>
%sib = fptosi <2 x double> %b to <2 x i64>
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
%and1 = and <2 x i64> %sia, %bc1
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %sib, %bc2
%or = or <2 x i64> %and1, %and2
ret <2 x i64> %or
}
define <2 x i64> @bitcast_select_swap7(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @bitcast_select_swap7(
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[OR]]
;
%sia = fptosi <2 x double> %a to <2 x i64>
%sib = fptosi <2 x double> %b to <2 x i64>
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
%and1 = and <2 x i64> %sia, %bc1
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %sib, %bc2
%or = or <2 x i64> %and2, %and1
ret <2 x i64> %or
}
define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: @bitcast_select_multi_uses(
; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> %cmp to <4 x i32>
; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64>
; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[BC1]], %a
[InstCombine] reverse bitcast + bitwise-logic canonicalization (PR33138) There are 2 parts to this patch made simultaneously to avoid a regression. We're reversing the canonicalization that moves bitwise vector ops before bitcasts. We're moving bitwise vector ops *after* bitcasts instead. That's the 1st and 3rd hunks of the patch. The motivation is that there's only one fold that currently depends on the existing canonicalization (see next), but there are many folds that would automatically benefit from the new canonicalization. PR33138 ( https://bugs.llvm.org/show_bug.cgi?id=33138 ) shows why/how we have these patterns in IR. There's an or(and,andn) pattern that requires an adjustment in order to continue matching to 'select' because the bitcast changes position. This match is unfortunately complicated because it requires 4 logic ops with optional bitcast and sext ops. Test diffs: 1. The bitcast.ll and bitcast-bigendian.ll changes show the most basic difference - bitcast comes before logic. 2. There are also tests with no diffs in bitcast.ll that verify that we're still doing folds that were enabled by the previous canonicalization. 3. icmp-xor-signbit.ll shows the payoff. We don't need to adjust existing icmp patterns to look through bitcasts. 4. logical-select.ll contains several tests for the or(and,andn) --> select fold to verify that we are still handling those cases. The lone diff shows the movement of the bitcast from the new canonicalization rule. Differential Revision: https://reviews.llvm.org/D33517 llvm-svn: 306011
2017-06-22 23:46:54 +08:00
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64>
; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], <i64 -1, i64 -1>
; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[BC2]], %b
; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]]
; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]]
; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[OR]], [[ADD]]
; CHECK-NEXT: ret <2 x i64> [[SUB]]
;
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
%and1 = and <2 x i64> %a, %bc1
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %b, %bc2
%or = or <2 x i64> %and2, %and1
%add = add <2 x i64> %and2, %bc2
%sub = sub <2 x i64> %or, %add
ret <2 x i64> %sub
}
define i1 @bools(i1 %a, i1 %b, i1 %c) {
; CHECK-LABEL: @bools(
; CHECK-NEXT: [[TMP1:%.*]] = select i1 %c, i1 %b, i1 %a
; CHECK-NEXT: ret i1 [[TMP1]]
;
%not = xor i1 %c, -1
%and1 = and i1 %not, %a
%and2 = and i1 %c, %b
%or = or i1 %and1, %and2
ret i1 %or
}
; Form a select if we know we can get replace 2 simple logic ops.
define i1 @bools_multi_uses1(i1 %a, i1 %b, i1 %c) {
; CHECK-LABEL: @bools_multi_uses1(
; CHECK-NEXT: [[NOT:%.*]] = xor i1 %c, true
; CHECK-NEXT: [[AND1:%.*]] = and i1 [[NOT]], %a
; CHECK-NEXT: [[TMP1:%.*]] = select i1 %c, i1 %b, i1 %a
; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[TMP1]], [[AND1]]
; CHECK-NEXT: ret i1 [[XOR]]
;
%not = xor i1 %c, -1
%and1 = and i1 %not, %a
%and2 = and i1 %c, %b
%or = or i1 %and1, %and2
%xor = xor i1 %or, %and1
ret i1 %xor
}
; Don't replace a cheap logic op with a potentially expensive select
; unless we can also eliminate one of the other original ops.
define i1 @bools_multi_uses2(i1 %a, i1 %b, i1 %c) {
; CHECK-LABEL: @bools_multi_uses2(
; CHECK-NEXT: [[NOT:%.*]] = xor i1 %c, true
; CHECK-NEXT: [[AND1:%.*]] = and i1 [[NOT]], %a
; CHECK-NEXT: [[AND2:%.*]] = and i1 %c, %b
; CHECK-NEXT: [[ADD:%.*]] = xor i1 [[AND1]], [[AND2]]
; CHECK-NEXT: ret i1 [[ADD]]
;
%not = xor i1 %c, -1
%and1 = and i1 %not, %a
%and2 = and i1 %c, %b
%or = or i1 %and1, %and2
%add = add i1 %and1, %and2
%and3 = and i1 %or, %add
ret i1 %and3
}
define <4 x i1> @vec_of_bools(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c) {
; CHECK-LABEL: @vec_of_bools(
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> %c, <4 x i1> %b, <4 x i1> %a
; CHECK-NEXT: ret <4 x i1> [[TMP1]]
;
%not = xor <4 x i1> %c, <i1 true, i1 true, i1 true, i1 true>
%and1 = and <4 x i1> %not, %a
%and2 = and <4 x i1> %b, %c
%or = or <4 x i1> %and2, %and1
ret <4 x i1> %or
}
define i4 @vec_of_casted_bools(i4 %a, i4 %b, <4 x i1> %c) {
; CHECK-LABEL: @vec_of_casted_bools(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i4 %a to <4 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i4 %b to <4 x i1>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %c, <4 x i1> [[TMP2]], <4 x i1> [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
; CHECK-NEXT: ret i4 [[TMP4]]
;
%not = xor <4 x i1> %c, <i1 true, i1 true, i1 true, i1 true>
%bc1 = bitcast <4 x i1> %not to i4
%bc2 = bitcast <4 x i1> %c to i4
%and1 = and i4 %a, %bc1
%and2 = and i4 %bc2, %b
%or = or i4 %and1, %and2
ret i4 %or
}
; Inverted 'and' constants mean this is a select which is canonicalized to a shuffle.
define <4 x i32> @vec_sel_consts(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @vec_sel_consts(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 -1>
%and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 0>
%or = or <4 x i32> %and1, %and2
ret <4 x i32> %or
}
define <3 x i129> @vec_sel_consts_weird(<3 x i129> %a, <3 x i129> %b) {
; CHECK-LABEL: @vec_sel_consts_weird(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i129> %b, <3 x i129> %a, <3 x i32> <i32 3, i32 1, i32 5>
; CHECK-NEXT: ret <3 x i129> [[TMP1]]
;
%and1 = and <3 x i129> %a, <i129 -1, i129 0, i129 -1>
%and2 = and <3 x i129> %b, <i129 0, i129 -1, i129 0>
%or = or <3 x i129> %and2, %and1
ret <3 x i129> %or
}
; The mask elements must be inverted for this to be a select.
define <4 x i32> @vec_not_sel_consts(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: @vec_not_sel_consts(
; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> %b, <i32 0, i32 -1, i32 0, i32 -1>
; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]]
; CHECK-NEXT: ret <4 x i32> [[OR]]
;
%and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
%and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 0, i32 -1>
%or = or <4 x i32> %and1, %and2
ret <4 x i32> %or
}
; The inverted constants may be operands of xor instructions.
define <4 x i32> @vec_sel_xor(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) {
; CHECK-LABEL: @vec_sel_xor(
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> %c, <i1 false, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> %a, <4 x i32> %b
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%mask = sext <4 x i1> %c to <4 x i32>
%mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0>
%not_mask_flip1 = xor <4 x i32> %mask, <i32 0, i32 -1, i32 -1, i32 -1>
%and1 = and <4 x i32> %not_mask_flip1, %a
%and2 = and <4 x i32> %mask_flip1, %b
%or = or <4 x i32> %and1, %and2
ret <4 x i32> %or
}
; Allow the transform even if the mask values have multiple uses because
; there's still a net reduction of instructions from removing the and/and/or.
define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) {
; CHECK-LABEL: @vec_sel_xor_multi_use(
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[C:%.*]], <i1 true, i1 false, i1 false, i1 false>
; CHECK-NEXT: [[MASK_FLIP1:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[C]], <i1 false, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]
; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[TMP3]], [[MASK_FLIP1]]
; CHECK-NEXT: ret <4 x i32> [[ADD]]
;
%mask = sext <4 x i1> %c to <4 x i32>
%mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0>
%not_mask_flip1 = xor <4 x i32> %mask, <i32 0, i32 -1, i32 -1, i32 -1>
%and1 = and <4 x i32> %not_mask_flip1, %a
%and2 = and <4 x i32> %mask_flip1, %b
%or = or <4 x i32> %and1, %and2
%add = add <4 x i32> %or, %mask_flip1
ret <4 x i32> %add
}