2016-05-28 23:44:28 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
2010-02-06 03:53:02 +08:00
|
|
|
; RUN: opt < %s -instcombine -S | FileCheck %s
|
|
|
|
|
2008-10-29 06:38:57 +08:00
|
|
|
|
2016-05-28 23:44:28 +08:00
|
|
|
define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
|
|
; CHECK-LABEL: @foo(
|
|
|
|
; CHECK-NEXT: [[E:%.*]] = icmp slt i32 %a, %b
|
|
|
|
; CHECK-NEXT: [[J:%.*]] = select i1 [[E]], i32 %c, i32 %d
|
|
|
|
; CHECK-NEXT: ret i32 [[J]]
|
|
|
|
;
|
2008-10-29 06:38:57 +08:00
|
|
|
%e = icmp slt i32 %a, %b
|
|
|
|
%f = sext i1 %e to i32
|
|
|
|
%g = and i32 %c, %f
|
|
|
|
%h = xor i32 %f, -1
|
|
|
|
%i = and i32 %d, %h
|
|
|
|
%j = or i32 %g, %i
|
|
|
|
ret i32 %j
|
|
|
|
}
|
2016-05-28 23:44:28 +08:00
|
|
|
|
|
|
|
define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
|
|
; CHECK-LABEL: @bar(
|
|
|
|
; CHECK-NEXT: [[E:%.*]] = icmp slt i32 %a, %b
|
|
|
|
; CHECK-NEXT: [[J:%.*]] = select i1 [[E]], i32 %c, i32 %d
|
|
|
|
; CHECK-NEXT: ret i32 [[J]]
|
|
|
|
;
|
2008-10-29 06:38:57 +08:00
|
|
|
%e = icmp slt i32 %a, %b
|
|
|
|
%f = sext i1 %e to i32
|
|
|
|
%g = and i32 %c, %f
|
|
|
|
%h = xor i32 %f, -1
|
|
|
|
%i = and i32 %d, %h
|
|
|
|
%j = or i32 %i, %g
|
|
|
|
ret i32 %j
|
|
|
|
}
|
2010-02-06 03:53:02 +08:00
|
|
|
|
2016-05-28 23:44:28 +08:00
|
|
|
define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
|
|
; CHECK-LABEL: @goo(
|
|
|
|
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 %a, %b
|
|
|
|
; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 %c, i32 %d
|
|
|
|
; CHECK-NEXT: ret i32 [[T3]]
|
|
|
|
;
|
|
|
|
%t0 = icmp slt i32 %a, %b
|
|
|
|
%iftmp.0.0 = select i1 %t0, i32 -1, i32 0
|
|
|
|
%t1 = and i32 %iftmp.0.0, %c
|
2008-10-31 04:40:10 +08:00
|
|
|
%not = xor i32 %iftmp.0.0, -1
|
2016-05-28 23:44:28 +08:00
|
|
|
%t2 = and i32 %not, %d
|
|
|
|
%t3 = or i32 %t1, %t2
|
|
|
|
ret i32 %t3
|
2008-10-31 04:40:10 +08:00
|
|
|
}
|
2016-05-28 23:44:28 +08:00
|
|
|
|
|
|
|
define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
|
|
; CHECK-LABEL: @poo(
|
|
|
|
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 %a, %b
|
|
|
|
; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 %c, i32 %d
|
|
|
|
; CHECK-NEXT: ret i32 [[T3]]
|
|
|
|
;
|
|
|
|
%t0 = icmp slt i32 %a, %b
|
|
|
|
%iftmp.0.0 = select i1 %t0, i32 -1, i32 0
|
|
|
|
%t1 = and i32 %iftmp.0.0, %c
|
|
|
|
%iftmp = select i1 %t0, i32 0, i32 -1
|
|
|
|
%t2 = and i32 %iftmp, %d
|
|
|
|
%t3 = or i32 %t1, %t2
|
|
|
|
ret i32 %t3
|
2008-11-16 12:25:26 +08:00
|
|
|
}
|
|
|
|
|
2017-05-16 07:59:28 +08:00
|
|
|
; PR32791 - https://bugs.llvm.org//show_bug.cgi?id=32791
|
2017-06-28 01:53:22 +08:00
|
|
|
; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
|
|
|
|
|
2017-05-16 07:59:28 +08:00
|
|
|
define i32 @fold_inverted_icmp_preds(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
|
|
; CHECK-LABEL: @fold_inverted_icmp_preds(
|
|
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 %a, %b
|
|
|
|
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 %c, i32 0
|
2017-06-28 01:53:22 +08:00
|
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %a, %b
|
|
|
|
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 %d
|
2017-05-16 07:59:28 +08:00
|
|
|
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]]
|
|
|
|
; CHECK-NEXT: ret i32 [[OR]]
|
|
|
|
;
|
|
|
|
%cmp1 = icmp slt i32 %a, %b
|
|
|
|
%sel1 = select i1 %cmp1, i32 %c, i32 0
|
|
|
|
%cmp2 = icmp sge i32 %a, %b
|
|
|
|
%sel2 = select i1 %cmp2, i32 %d, i32 0
|
|
|
|
%or = or i32 %sel1, %sel2
|
|
|
|
ret i32 %or
|
|
|
|
}
|
|
|
|
|
2017-06-28 01:53:22 +08:00
|
|
|
; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
|
|
|
|
|
2017-05-16 07:59:28 +08:00
|
|
|
define i32 @fold_inverted_icmp_preds_reverse(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
|
|
; CHECK-LABEL: @fold_inverted_icmp_preds_reverse(
|
|
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 %a, %b
|
|
|
|
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 %c
|
2017-06-28 01:53:22 +08:00
|
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 %a, %b
|
|
|
|
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 %d, i32 0
|
2017-05-16 07:59:28 +08:00
|
|
|
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]]
|
|
|
|
; CHECK-NEXT: ret i32 [[OR]]
|
|
|
|
;
|
|
|
|
%cmp1 = icmp slt i32 %a, %b
|
|
|
|
%sel1 = select i1 %cmp1, i32 0, i32 %c
|
|
|
|
%cmp2 = icmp sge i32 %a, %b
|
|
|
|
%sel2 = select i1 %cmp2, i32 0, i32 %d
|
|
|
|
%or = or i32 %sel1, %sel2
|
|
|
|
ret i32 %or
|
|
|
|
}
|
|
|
|
|
2017-06-28 01:53:22 +08:00
|
|
|
; TODO: Should fcmp have the same sort of predicate canonicalization as icmp?
|
|
|
|
|
2017-05-16 07:59:28 +08:00
|
|
|
define i32 @fold_inverted_fcmp_preds(float %a, float %b, i32 %c, i32 %d) {
|
|
|
|
; CHECK-LABEL: @fold_inverted_fcmp_preds(
|
|
|
|
; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float %a, %b
|
|
|
|
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 %c, i32 0
|
|
|
|
; CHECK-NEXT: [[CMP2:%.*]] = fcmp uge float %a, %b
|
|
|
|
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 %d, i32 0
|
|
|
|
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]]
|
|
|
|
; CHECK-NEXT: ret i32 [[OR]]
|
|
|
|
;
|
|
|
|
%cmp1 = fcmp olt float %a, %b
|
|
|
|
%sel1 = select i1 %cmp1, i32 %c, i32 0
|
|
|
|
%cmp2 = fcmp uge float %a, %b
|
|
|
|
%sel2 = select i1 %cmp2, i32 %d, i32 0
|
|
|
|
%or = or i32 %sel1, %sel2
|
|
|
|
ret i32 %or
|
|
|
|
}
|
|
|
|
|
2017-06-28 01:53:22 +08:00
|
|
|
; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this.
|
|
|
|
|
2017-05-16 07:59:28 +08:00
|
|
|
define <2 x i32> @fold_inverted_icmp_vector_preds(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
|
|
|
|
; CHECK-LABEL: @fold_inverted_icmp_vector_preds(
|
2017-06-28 01:53:22 +08:00
|
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq <2 x i32> %a, %b
|
|
|
|
; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i32> zeroinitializer, <2 x i32> %c
|
2017-05-16 07:59:28 +08:00
|
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq <2 x i32> %a, %b
|
|
|
|
; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> %d, <2 x i32> zeroinitializer
|
|
|
|
; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[SEL1]], [[SEL2]]
|
|
|
|
; CHECK-NEXT: ret <2 x i32> [[OR]]
|
|
|
|
;
|
|
|
|
%cmp1 = icmp ne <2 x i32> %a, %b
|
|
|
|
%sel1 = select <2 x i1> %cmp1, <2 x i32> %c, <2 x i32> <i32 0, i32 0>
|
|
|
|
%cmp2 = icmp eq <2 x i32> %a, %b
|
|
|
|
%sel2 = select <2 x i1> %cmp2, <2 x i32> %d, <2 x i32> <i32 0, i32 0>
|
|
|
|
%or = or <2 x i32> %sel1, %sel2
|
|
|
|
ret <2 x i32> %or
|
|
|
|
}
|
|
|
|
|
2016-05-28 23:44:28 +08:00
|
|
|
define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) {
|
|
|
|
; CHECK-LABEL: @par(
|
|
|
|
; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 %a, %b
|
|
|
|
; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 %c, i32 %d
|
|
|
|
; CHECK-NEXT: ret i32 [[T3]]
|
|
|
|
;
|
|
|
|
%t0 = icmp slt i32 %a, %b
|
|
|
|
%iftmp.1.0 = select i1 %t0, i32 -1, i32 0
|
|
|
|
%t1 = and i32 %iftmp.1.0, %c
|
2008-10-31 04:40:10 +08:00
|
|
|
%not = xor i32 %iftmp.1.0, -1
|
2016-05-28 23:44:28 +08:00
|
|
|
%t2 = and i32 %not, %d
|
|
|
|
%t3 = or i32 %t1, %t2
|
|
|
|
ret i32 %t3
|
2008-10-31 04:40:10 +08:00
|
|
|
}
|
2016-05-29 00:10:37 +08:00
|
|
|
|
2016-06-25 02:26:02 +08:00
|
|
|
; In the following tests (8 commutation variants), verify that a bitcast doesn't get
|
|
|
|
; in the way of a select transform. These bitcasts are common in SSE/AVX and possibly
|
2016-06-03 06:45:49 +08:00
|
|
|
; other vector code because of canonicalization to i64 elements for vectors.
|
2016-05-29 00:10:37 +08:00
|
|
|
|
2016-06-25 02:26:02 +08:00
|
|
|
; The fptosi instructions are included to avoid commutation canonicalization based on
|
|
|
|
; operator weight. Using another cast operator ensures that both operands of all logic
|
|
|
|
; ops are equally weighted, and this ensures that we're testing all commutation
|
|
|
|
; possibilities.
|
|
|
|
|
|
|
|
define <2 x i64> @bitcast_select_swap0(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
|
|
|
|
; CHECK-LABEL: @bitcast_select_swap0(
|
|
|
|
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
|
|
|
|
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
|
|
|
|
; CHECK-NEXT: ret <2 x i64> [[OR]]
|
|
|
|
;
|
|
|
|
%sia = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
%sib = fptosi <2 x double> %b to <2 x i64>
|
|
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
|
|
|
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
|
|
|
|
%and1 = and <2 x i64> %bc1, %sia
|
|
|
|
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
|
|
|
|
%and2 = and <2 x i64> %bc2, %sib
|
|
|
|
%or = or <2 x i64> %and1, %and2
|
|
|
|
ret <2 x i64> %or
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @bitcast_select_swap1(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
|
|
|
|
; CHECK-LABEL: @bitcast_select_swap1(
|
|
|
|
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
|
|
|
|
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
|
|
|
|
; CHECK-NEXT: ret <2 x i64> [[OR]]
|
|
|
|
;
|
|
|
|
%sia = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
%sib = fptosi <2 x double> %b to <2 x i64>
|
|
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
|
|
|
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
|
|
|
|
%and1 = and <2 x i64> %bc1, %sia
|
|
|
|
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
|
|
|
|
%and2 = and <2 x i64> %bc2, %sib
|
|
|
|
%or = or <2 x i64> %and2, %and1
|
|
|
|
ret <2 x i64> %or
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @bitcast_select_swap2(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
|
|
|
|
; CHECK-LABEL: @bitcast_select_swap2(
|
|
|
|
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
|
|
|
|
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
|
|
|
|
; CHECK-NEXT: ret <2 x i64> [[OR]]
|
|
|
|
;
|
|
|
|
%sia = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
%sib = fptosi <2 x double> %b to <2 x i64>
|
|
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
|
|
|
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
|
|
|
|
%and1 = and <2 x i64> %bc1, %sia
|
|
|
|
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
|
|
|
|
%and2 = and <2 x i64> %sib, %bc2
|
|
|
|
%or = or <2 x i64> %and1, %and2
|
|
|
|
ret <2 x i64> %or
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @bitcast_select_swap3(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
|
|
|
|
; CHECK-LABEL: @bitcast_select_swap3(
|
|
|
|
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
|
|
|
|
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
|
|
|
|
; CHECK-NEXT: ret <2 x i64> [[OR]]
|
|
|
|
;
|
|
|
|
%sia = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
%sib = fptosi <2 x double> %b to <2 x i64>
|
|
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
|
|
|
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
|
|
|
|
%and1 = and <2 x i64> %bc1, %sia
|
|
|
|
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
|
|
|
|
%and2 = and <2 x i64> %sib, %bc2
|
|
|
|
%or = or <2 x i64> %and2, %and1
|
|
|
|
ret <2 x i64> %or
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @bitcast_select_swap4(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
|
|
|
|
; CHECK-LABEL: @bitcast_select_swap4(
|
|
|
|
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
|
[InstCombine] look through bitcasts to find selects
There was concern that creating bitcasts for the simpler potential select pattern:
define <2 x i64> @vecBitcastOp1(<4 x i1> %cmp, <2 x i64> %a) {
%a2 = add <2 x i64> %a, %a
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %a2, %bc
ret <2 x i64> %and
}
might lead to worse code for some targets, so this patch is matching the larger
patterns seen in the test cases.
The motivating example for this patch is this IR produced via SSE intrinsics in C:
define <2 x i64> @gibson(<2 x i64> %a, <2 x i64> %b) {
%t0 = bitcast <2 x i64> %a to <4 x i32>
%t1 = bitcast <2 x i64> %b to <4 x i32>
%cmp = icmp sgt <4 x i32> %t0, %t1
%sext = sext <4 x i1> %cmp to <4 x i32>
%t2 = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %t2, %a
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%neg2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %neg2, %b
%or = or <2 x i64> %and, %and2
ret <2 x i64> %or
}
For an AVX target, this is currently:
vpcmpgtd %xmm1, %xmm0, %xmm2
vpand %xmm0, %xmm2, %xmm0
vpandn %xmm1, %xmm2, %xmm1
vpor %xmm1, %xmm0, %xmm0
retq
With this patch, it becomes:
vpmaxsd %xmm1, %xmm0, %xmm0
Differential Revision: http://reviews.llvm.org/D20774
llvm-svn: 271676
2016-06-03 22:42:07 +08:00
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
|
|
|
|
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
|
2016-06-03 06:45:49 +08:00
|
|
|
; CHECK-NEXT: ret <2 x i64> [[OR]]
|
2016-05-29 00:10:37 +08:00
|
|
|
;
|
2016-06-25 02:26:02 +08:00
|
|
|
%sia = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
%sib = fptosi <2 x double> %b to <2 x i64>
|
2016-05-29 00:10:37 +08:00
|
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
2016-06-25 02:26:02 +08:00
|
|
|
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
|
|
|
|
%and1 = and <2 x i64> %sia, %bc1
|
2016-06-03 06:45:49 +08:00
|
|
|
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
|
2016-06-25 02:26:02 +08:00
|
|
|
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
|
|
|
|
%and2 = and <2 x i64> %bc2, %sib
|
|
|
|
%or = or <2 x i64> %and1, %and2
|
2016-06-03 06:45:49 +08:00
|
|
|
ret <2 x i64> %or
|
2016-05-29 00:10:37 +08:00
|
|
|
}
|
|
|
|
|
2016-06-25 02:26:02 +08:00
|
|
|
define <2 x i64> @bitcast_select_swap5(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
|
|
|
|
; CHECK-LABEL: @bitcast_select_swap5(
|
|
|
|
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
|
[InstCombine] look through bitcasts to find selects
There was concern that creating bitcasts for the simpler potential select pattern:
define <2 x i64> @vecBitcastOp1(<4 x i1> %cmp, <2 x i64> %a) {
%a2 = add <2 x i64> %a, %a
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %a2, %bc
ret <2 x i64> %and
}
might lead to worse code for some targets, so this patch is matching the larger
patterns seen in the test cases.
The motivating example for this patch is this IR produced via SSE intrinsics in C:
define <2 x i64> @gibson(<2 x i64> %a, <2 x i64> %b) {
%t0 = bitcast <2 x i64> %a to <4 x i32>
%t1 = bitcast <2 x i64> %b to <4 x i32>
%cmp = icmp sgt <4 x i32> %t0, %t1
%sext = sext <4 x i1> %cmp to <4 x i32>
%t2 = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %t2, %a
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%neg2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %neg2, %b
%or = or <2 x i64> %and, %and2
ret <2 x i64> %or
}
For an AVX target, this is currently:
vpcmpgtd %xmm1, %xmm0, %xmm2
vpand %xmm0, %xmm2, %xmm0
vpandn %xmm1, %xmm2, %xmm1
vpor %xmm1, %xmm0, %xmm0
retq
With this patch, it becomes:
vpmaxsd %xmm1, %xmm0, %xmm0
Differential Revision: http://reviews.llvm.org/D20774
llvm-svn: 271676
2016-06-03 22:42:07 +08:00
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
|
|
|
|
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
|
2016-06-03 06:45:49 +08:00
|
|
|
; CHECK-NEXT: ret <2 x i64> [[OR]]
|
2016-05-29 00:10:37 +08:00
|
|
|
;
|
2016-06-25 02:26:02 +08:00
|
|
|
%sia = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
%sib = fptosi <2 x double> %b to <2 x i64>
|
2016-05-29 00:10:37 +08:00
|
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
2016-06-25 02:26:02 +08:00
|
|
|
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
|
|
|
|
%and1 = and <2 x i64> %sia, %bc1
|
2016-05-29 00:10:37 +08:00
|
|
|
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
|
2016-06-25 02:26:02 +08:00
|
|
|
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
|
|
|
|
%and2 = and <2 x i64> %bc2, %sib
|
|
|
|
%or = or <2 x i64> %and2, %and1
|
2016-06-03 06:45:49 +08:00
|
|
|
ret <2 x i64> %or
|
2016-05-29 00:10:37 +08:00
|
|
|
}
|
|
|
|
|
2016-06-25 02:26:02 +08:00
|
|
|
define <2 x i64> @bitcast_select_swap6(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
|
|
|
|
; CHECK-LABEL: @bitcast_select_swap6(
|
|
|
|
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
|
[InstCombine] look through bitcasts to find selects
There was concern that creating bitcasts for the simpler potential select pattern:
define <2 x i64> @vecBitcastOp1(<4 x i1> %cmp, <2 x i64> %a) {
%a2 = add <2 x i64> %a, %a
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %a2, %bc
ret <2 x i64> %and
}
might lead to worse code for some targets, so this patch is matching the larger
patterns seen in the test cases.
The motivating example for this patch is this IR produced via SSE intrinsics in C:
define <2 x i64> @gibson(<2 x i64> %a, <2 x i64> %b) {
%t0 = bitcast <2 x i64> %a to <4 x i32>
%t1 = bitcast <2 x i64> %b to <4 x i32>
%cmp = icmp sgt <4 x i32> %t0, %t1
%sext = sext <4 x i1> %cmp to <4 x i32>
%t2 = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %t2, %a
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%neg2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %neg2, %b
%or = or <2 x i64> %and, %and2
ret <2 x i64> %or
}
For an AVX target, this is currently:
vpcmpgtd %xmm1, %xmm0, %xmm2
vpand %xmm0, %xmm2, %xmm0
vpandn %xmm1, %xmm2, %xmm1
vpor %xmm1, %xmm0, %xmm0
retq
With this patch, it becomes:
vpmaxsd %xmm1, %xmm0, %xmm0
Differential Revision: http://reviews.llvm.org/D20774
llvm-svn: 271676
2016-06-03 22:42:07 +08:00
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
|
|
|
|
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
|
2016-06-03 06:45:49 +08:00
|
|
|
; CHECK-NEXT: ret <2 x i64> [[OR]]
|
2016-05-29 00:10:37 +08:00
|
|
|
;
|
2016-06-25 02:26:02 +08:00
|
|
|
%sia = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
%sib = fptosi <2 x double> %b to <2 x i64>
|
2016-05-29 00:10:37 +08:00
|
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
2016-06-25 02:26:02 +08:00
|
|
|
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
|
|
|
|
%and1 = and <2 x i64> %sia, %bc1
|
2016-05-29 00:10:37 +08:00
|
|
|
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
|
2016-06-25 02:26:02 +08:00
|
|
|
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
|
|
|
|
%and2 = and <2 x i64> %sib, %bc2
|
|
|
|
%or = or <2 x i64> %and1, %and2
|
2016-06-03 06:45:49 +08:00
|
|
|
ret <2 x i64> %or
|
2016-05-29 00:10:37 +08:00
|
|
|
}
|
|
|
|
|
2016-06-25 02:26:02 +08:00
|
|
|
define <2 x i64> @bitcast_select_swap7(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
|
|
|
|
; CHECK-LABEL: @bitcast_select_swap7(
|
|
|
|
; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
|
[InstCombine] look through bitcasts to find selects
There was concern that creating bitcasts for the simpler potential select pattern:
define <2 x i64> @vecBitcastOp1(<4 x i1> %cmp, <2 x i64> %a) {
%a2 = add <2 x i64> %a, %a
%sext = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %a2, %bc
ret <2 x i64> %and
}
might lead to worse code for some targets, so this patch is matching the larger
patterns seen in the test cases.
The motivating example for this patch is this IR produced via SSE intrinsics in C:
define <2 x i64> @gibson(<2 x i64> %a, <2 x i64> %b) {
%t0 = bitcast <2 x i64> %a to <4 x i32>
%t1 = bitcast <2 x i64> %b to <4 x i32>
%cmp = icmp sgt <4 x i32> %t0, %t1
%sext = sext <4 x i1> %cmp to <4 x i32>
%t2 = bitcast <4 x i32> %sext to <2 x i64>
%and = and <2 x i64> %t2, %a
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
%neg2 = bitcast <4 x i32> %neg to <2 x i64>
%and2 = and <2 x i64> %neg2, %b
%or = or <2 x i64> %and, %and2
ret <2 x i64> %or
}
For an AVX target, this is currently:
vpcmpgtd %xmm1, %xmm0, %xmm2
vpand %xmm0, %xmm2, %xmm0
vpandn %xmm1, %xmm2, %xmm1
vpor %xmm1, %xmm0, %xmm0
retq
With this patch, it becomes:
vpmaxsd %xmm1, %xmm0, %xmm0
Differential Revision: http://reviews.llvm.org/D20774
llvm-svn: 271676
2016-06-03 22:42:07 +08:00
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
|
|
|
|
; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
|
2016-06-03 06:45:49 +08:00
|
|
|
; CHECK-NEXT: ret <2 x i64> [[OR]]
|
2016-05-29 00:10:37 +08:00
|
|
|
;
|
2016-06-25 02:26:02 +08:00
|
|
|
%sia = fptosi <2 x double> %a to <2 x i64>
|
|
|
|
%sib = fptosi <2 x double> %b to <2 x i64>
|
2016-05-29 00:10:37 +08:00
|
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
2016-06-25 02:26:02 +08:00
|
|
|
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
|
|
|
|
%and1 = and <2 x i64> %sia, %bc1
|
2016-06-03 06:45:49 +08:00
|
|
|
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
|
2016-06-25 02:26:02 +08:00
|
|
|
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
|
|
|
|
%and2 = and <2 x i64> %sib, %bc2
|
|
|
|
%or = or <2 x i64> %and2, %and1
|
2016-06-03 06:45:49 +08:00
|
|
|
ret <2 x i64> %or
|
2016-05-29 00:10:37 +08:00
|
|
|
}
|
|
|
|
|
2016-07-09 05:08:16 +08:00
|
|
|
define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i64> %b) {
|
|
|
|
; CHECK-LABEL: @bitcast_select_multi_uses(
|
|
|
|
; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> %cmp to <4 x i32>
|
2016-07-09 05:17:51 +08:00
|
|
|
; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[BC1]], %a
|
2017-06-22 23:46:54 +08:00
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64>
|
|
|
|
; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], <i64 -1, i64 -1>
|
2016-07-09 05:08:16 +08:00
|
|
|
; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[BC2]], %b
|
2016-07-09 05:17:51 +08:00
|
|
|
; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]]
|
2016-07-09 05:08:16 +08:00
|
|
|
; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]]
|
2016-07-09 05:17:51 +08:00
|
|
|
; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[OR]], [[ADD]]
|
2016-07-09 05:08:16 +08:00
|
|
|
; CHECK-NEXT: ret <2 x i64> [[SUB]]
|
|
|
|
;
|
|
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
|
|
|
%bc1 = bitcast <4 x i32> %sext to <2 x i64>
|
|
|
|
%and1 = and <2 x i64> %a, %bc1
|
|
|
|
%neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
|
|
%bc2 = bitcast <4 x i32> %neg to <2 x i64>
|
|
|
|
%and2 = and <2 x i64> %b, %bc2
|
|
|
|
%or = or <2 x i64> %and2, %and1
|
|
|
|
%add = add <2 x i64> %and2, %bc2
|
|
|
|
%sub = sub <2 x i64> %or, %add
|
|
|
|
ret <2 x i64> %sub
|
|
|
|
}
|
|
|
|
|
2016-06-27 07:44:21 +08:00
|
|
|
define i1 @bools(i1 %a, i1 %b, i1 %c) {
|
|
|
|
; CHECK-LABEL: @bools(
|
2016-06-30 22:18:18 +08:00
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = select i1 %c, i1 %b, i1 %a
|
|
|
|
; CHECK-NEXT: ret i1 [[TMP1]]
|
2016-06-27 07:44:21 +08:00
|
|
|
;
|
|
|
|
%not = xor i1 %c, -1
|
|
|
|
%and1 = and i1 %not, %a
|
|
|
|
%and2 = and i1 %c, %b
|
|
|
|
%or = or i1 %and1, %and2
|
|
|
|
ret i1 %or
|
|
|
|
}
|
|
|
|
|
2016-07-09 04:22:27 +08:00
|
|
|
; Form a select if we know we can get replace 2 simple logic ops.
|
|
|
|
|
|
|
|
define i1 @bools_multi_uses1(i1 %a, i1 %b, i1 %c) {
|
|
|
|
; CHECK-LABEL: @bools_multi_uses1(
|
|
|
|
; CHECK-NEXT: [[NOT:%.*]] = xor i1 %c, true
|
|
|
|
; CHECK-NEXT: [[AND1:%.*]] = and i1 [[NOT]], %a
|
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = select i1 %c, i1 %b, i1 %a
|
|
|
|
; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[TMP1]], [[AND1]]
|
|
|
|
; CHECK-NEXT: ret i1 [[XOR]]
|
|
|
|
;
|
|
|
|
%not = xor i1 %c, -1
|
|
|
|
%and1 = and i1 %not, %a
|
|
|
|
%and2 = and i1 %c, %b
|
|
|
|
%or = or i1 %and1, %and2
|
|
|
|
%xor = xor i1 %or, %and1
|
|
|
|
ret i1 %xor
|
|
|
|
}
|
|
|
|
|
2016-07-09 05:08:16 +08:00
|
|
|
; Don't replace a cheap logic op with a potentially expensive select
|
2016-07-09 04:22:27 +08:00
|
|
|
; unless we can also eliminate one of the other original ops.
|
|
|
|
|
|
|
|
define i1 @bools_multi_uses2(i1 %a, i1 %b, i1 %c) {
|
|
|
|
; CHECK-LABEL: @bools_multi_uses2(
|
|
|
|
; CHECK-NEXT: [[NOT:%.*]] = xor i1 %c, true
|
|
|
|
; CHECK-NEXT: [[AND1:%.*]] = and i1 [[NOT]], %a
|
|
|
|
; CHECK-NEXT: [[AND2:%.*]] = and i1 %c, %b
|
|
|
|
; CHECK-NEXT: [[ADD:%.*]] = xor i1 [[AND1]], [[AND2]]
|
2016-07-09 04:53:29 +08:00
|
|
|
; CHECK-NEXT: ret i1 [[ADD]]
|
2016-07-09 04:22:27 +08:00
|
|
|
;
|
|
|
|
%not = xor i1 %c, -1
|
|
|
|
%and1 = and i1 %not, %a
|
|
|
|
%and2 = and i1 %c, %b
|
|
|
|
%or = or i1 %and1, %and2
|
|
|
|
%add = add i1 %and1, %and2
|
2016-07-09 04:35:53 +08:00
|
|
|
%and3 = and i1 %or, %add
|
|
|
|
ret i1 %and3
|
2016-07-09 04:22:27 +08:00
|
|
|
}
|
|
|
|
|
2016-06-27 07:44:21 +08:00
|
|
|
define <4 x i1> @vec_of_bools(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c) {
|
|
|
|
; CHECK-LABEL: @vec_of_bools(
|
2016-06-30 22:18:18 +08:00
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> %c, <4 x i1> %b, <4 x i1> %a
|
|
|
|
; CHECK-NEXT: ret <4 x i1> [[TMP1]]
|
2016-06-27 07:44:21 +08:00
|
|
|
;
|
|
|
|
%not = xor <4 x i1> %c, <i1 true, i1 true, i1 true, i1 true>
|
|
|
|
%and1 = and <4 x i1> %not, %a
|
|
|
|
%and2 = and <4 x i1> %b, %c
|
|
|
|
%or = or <4 x i1> %and2, %and1
|
|
|
|
ret <4 x i1> %or
|
|
|
|
}
|
|
|
|
|
|
|
|
define i4 @vec_of_casted_bools(i4 %a, i4 %b, <4 x i1> %c) {
|
|
|
|
; CHECK-LABEL: @vec_of_casted_bools(
|
2016-06-30 22:18:18 +08:00
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i4 %a to <4 x i1>
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i4 %b to <4 x i1>
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %c, <4 x i1> [[TMP2]], <4 x i1> [[TMP1]]
|
|
|
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
|
|
|
|
; CHECK-NEXT: ret i4 [[TMP4]]
|
2016-06-27 07:44:21 +08:00
|
|
|
;
|
|
|
|
%not = xor <4 x i1> %c, <i1 true, i1 true, i1 true, i1 true>
|
|
|
|
%bc1 = bitcast <4 x i1> %not to i4
|
|
|
|
%bc2 = bitcast <4 x i1> %c to i4
|
|
|
|
%and1 = and i4 %a, %bc1
|
|
|
|
%and2 = and i4 %bc2, %b
|
|
|
|
%or = or i4 %and1, %and2
|
|
|
|
ret i4 %or
|
|
|
|
}
|
|
|
|
|
2016-09-17 06:16:18 +08:00
|
|
|
; Inverted 'and' constants mean this is a select which is canonicalized to a shuffle.
|
2016-06-27 07:44:21 +08:00
|
|
|
|
|
|
|
define <4 x i32> @vec_sel_consts(<4 x i32> %a, <4 x i32> %b) {
|
|
|
|
; CHECK-LABEL: @vec_sel_consts(
|
2016-09-17 06:16:18 +08:00
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
|
[InstCombine] extend vector select matching for non-splat constants
In D21740, we discussed trying to make this a more general matcher. However, I didn't see a clean
way to handle the regular m_Not cases and these non-splat vector patterns, so I've opted for the
direct approach here. If there are other potential uses of areInverseVectorBitmasks(), we could
move that helper function to a higher level.
There is an open question as to which is of these forms should be considered the canonical IR:
%sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
%shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
Differential Revision: http://reviews.llvm.org/D22114
llvm-svn: 275289
2016-07-14 02:07:02 +08:00
|
|
|
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
|
2016-06-27 07:44:21 +08:00
|
|
|
;
|
|
|
|
%and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 -1>
|
|
|
|
%and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 0>
|
|
|
|
%or = or <4 x i32> %and1, %and2
|
|
|
|
ret <4 x i32> %or
|
|
|
|
}
|
|
|
|
|
|
|
|
define <3 x i129> @vec_sel_consts_weird(<3 x i129> %a, <3 x i129> %b) {
|
|
|
|
; CHECK-LABEL: @vec_sel_consts_weird(
|
2016-09-17 06:16:18 +08:00
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i129> %b, <3 x i129> %a, <3 x i32> <i32 3, i32 1, i32 5>
|
[InstCombine] extend vector select matching for non-splat constants
In D21740, we discussed trying to make this a more general matcher. However, I didn't see a clean
way to handle the regular m_Not cases and these non-splat vector patterns, so I've opted for the
direct approach here. If there are other potential uses of areInverseVectorBitmasks(), we could
move that helper function to a higher level.
There is an open question as to which is of these forms should be considered the canonical IR:
%sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
%shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
Differential Revision: http://reviews.llvm.org/D22114
llvm-svn: 275289
2016-07-14 02:07:02 +08:00
|
|
|
; CHECK-NEXT: ret <3 x i129> [[TMP1]]
|
2016-06-27 07:44:21 +08:00
|
|
|
;
|
|
|
|
%and1 = and <3 x i129> %a, <i129 -1, i129 0, i129 -1>
|
|
|
|
%and2 = and <3 x i129> %b, <i129 0, i129 -1, i129 0>
|
|
|
|
%or = or <3 x i129> %and2, %and1
|
|
|
|
ret <3 x i129> %or
|
|
|
|
}
|
|
|
|
|
|
|
|
; The mask elements must be inverted for this to be a select.
|
|
|
|
|
|
|
|
define <4 x i32> @vec_not_sel_consts(<4 x i32> %a, <4 x i32> %b) {
|
|
|
|
; CHECK-LABEL: @vec_not_sel_consts(
|
|
|
|
; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
|
|
|
|
; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> %b, <i32 0, i32 -1, i32 0, i32 -1>
|
|
|
|
; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]]
|
|
|
|
; CHECK-NEXT: ret <4 x i32> [[OR]]
|
|
|
|
;
|
|
|
|
%and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
|
|
|
|
%and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 0, i32 -1>
|
|
|
|
%or = or <4 x i32> %and1, %and2
|
|
|
|
ret <4 x i32> %or
|
|
|
|
}
|
|
|
|
|
|
|
|
; The inverted constants may be operands of xor instructions.
|
|
|
|
|
|
|
|
define <4 x i32> @vec_sel_xor(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) {
|
|
|
|
; CHECK-LABEL: @vec_sel_xor(
|
[InstCombine] extend vector select matching for non-splat constants
In D21740, we discussed trying to make this a more general matcher. However, I didn't see a clean
way to handle the regular m_Not cases and these non-splat vector patterns, so I've opted for the
direct approach here. If there are other potential uses of areInverseVectorBitmasks(), we could
move that helper function to a higher level.
There is an open question as to which is of these forms should be considered the canonical IR:
%sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
%shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
Differential Revision: http://reviews.llvm.org/D22114
llvm-svn: 275289
2016-07-14 02:07:02 +08:00
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> %c, <i1 false, i1 true, i1 true, i1 true>
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> %a, <4 x i32> %b
|
|
|
|
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
|
|
|
;
|
|
|
|
%mask = sext <4 x i1> %c to <4 x i32>
|
|
|
|
%mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0>
|
|
|
|
%not_mask_flip1 = xor <4 x i32> %mask, <i32 0, i32 -1, i32 -1, i32 -1>
|
|
|
|
%and1 = and <4 x i32> %not_mask_flip1, %a
|
|
|
|
%and2 = and <4 x i32> %mask_flip1, %b
|
|
|
|
%or = or <4 x i32> %and1, %and2
|
|
|
|
ret <4 x i32> %or
|
|
|
|
}
|
|
|
|
|
|
|
|
; Allow the transform even if the mask values have multiple uses because
|
|
|
|
; there's still a net reduction of instructions from removing the and/and/or.
|
|
|
|
|
|
|
|
define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) {
|
|
|
|
; CHECK-LABEL: @vec_sel_xor_multi_use(
|
2017-08-03 04:25:56 +08:00
|
|
|
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[C:%.*]], <i1 true, i1 false, i1 false, i1 false>
|
|
|
|
; CHECK-NEXT: [[MASK_FLIP1:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
|
|
|
|
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[C]], <i1 false, i1 true, i1 true, i1 true>
|
|
|
|
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]
|
|
|
|
; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[TMP3]], [[MASK_FLIP1]]
|
[InstCombine] extend vector select matching for non-splat constants
In D21740, we discussed trying to make this a more general matcher. However, I didn't see a clean
way to handle the regular m_Not cases and these non-splat vector patterns, so I've opted for the
direct approach here. If there are other potential uses of areInverseVectorBitmasks(), we could
move that helper function to a higher level.
There is an open question as to which is of these forms should be considered the canonical IR:
%sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
%shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
Differential Revision: http://reviews.llvm.org/D22114
llvm-svn: 275289
2016-07-14 02:07:02 +08:00
|
|
|
; CHECK-NEXT: ret <4 x i32> [[ADD]]
|
2016-06-27 07:44:21 +08:00
|
|
|
;
|
|
|
|
%mask = sext <4 x i1> %c to <4 x i32>
|
|
|
|
%mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0>
|
|
|
|
%not_mask_flip1 = xor <4 x i32> %mask, <i32 0, i32 -1, i32 -1, i32 -1>
|
|
|
|
%and1 = and <4 x i32> %not_mask_flip1, %a
|
|
|
|
%and2 = and <4 x i32> %mask_flip1, %b
|
|
|
|
%or = or <4 x i32> %and1, %and2
|
[InstCombine] extend vector select matching for non-splat constants
In D21740, we discussed trying to make this a more general matcher. However, I didn't see a clean
way to handle the regular m_Not cases and these non-splat vector patterns, so I've opted for the
direct approach here. If there are other potential uses of areInverseVectorBitmasks(), we could
move that helper function to a higher level.
There is an open question as to which is of these forms should be considered the canonical IR:
%sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
%shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
Differential Revision: http://reviews.llvm.org/D22114
llvm-svn: 275289
2016-07-14 02:07:02 +08:00
|
|
|
%add = add <4 x i32> %or, %mask_flip1
|
|
|
|
ret <4 x i32> %add
|
2016-06-27 07:44:21 +08:00
|
|
|
}
|
|
|
|
|