forked from OSchip/llvm-project
[InstCombine] extend vector select matching for non-splat constants
In D21740, we discussed trying to make this a more general matcher. However, I didn't see a clean way to handle the regular m_Not cases and these non-splat vector patterns, so I've opted for the direct approach here. If there are other potential uses of areInverseVectorBitmasks(), we could move that helper function to a higher level. There is an open question as to which is of these forms should be considered the canonical IR: %sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b %shuf = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3> Differential Revision: http://reviews.llvm.org/D22114 llvm-svn: 275289
This commit is contained in:
parent
0532c190f7
commit
c00e48a3db
|
@ -1589,10 +1589,29 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
|
|||
return LastInst;
|
||||
}
|
||||
|
||||
/// If all elements of two constant vectors are 0/-1 and inverses, return true.
|
||||
static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
|
||||
unsigned NumElts = C1->getType()->getVectorNumElements();
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
Constant *EltC1 = C1->getAggregateElement(i);
|
||||
Constant *EltC2 = C2->getAggregateElement(i);
|
||||
if (!EltC1 || !EltC2)
|
||||
return false;
|
||||
|
||||
// One element must be all ones, and the other must be all zeros.
|
||||
// FIXME: Allow undef elements.
|
||||
if (!((match(EltC1, m_Zero()) && match(EltC2, m_AllOnes())) ||
|
||||
(match(EltC2, m_Zero()) && match(EltC1, m_AllOnes()))))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// We have an expression of the form (A & C) | (B & D). If A is a scalar or
|
||||
/// vector composed of all-zeros or all-ones values and is the bitwise 'not' of
|
||||
/// B, it can be used as the condition operand of a select instruction.
|
||||
static Value *getSelectCondition(Value *A, Value *B) {
|
||||
static Value *getSelectCondition(Value *A, Value *B,
|
||||
InstCombiner::BuilderTy &Builder) {
|
||||
// If these are scalars or vectors of i1, A can be used directly.
|
||||
Type *Ty = A->getType();
|
||||
if (match(A, m_Not(m_Specific(B))) && Ty->getScalarType()->isIntegerTy(1))
|
||||
|
@ -1606,8 +1625,26 @@ static Value *getSelectCondition(Value *A, Value *B) {
|
|||
m_SExt(m_Not(m_Specific(Cond))))))
|
||||
return Cond;
|
||||
|
||||
// TODO: Try more matches that only apply to non-splat constant vectors.
|
||||
// All scalar (and most vector) possibilities should be handled now.
|
||||
// Try more matches that only apply to non-splat constant vectors.
|
||||
if (!Ty->isVectorTy())
|
||||
return nullptr;
|
||||
|
||||
// If both operands are constants, see if the constants are inverse bitmasks.
|
||||
Constant *AC, *BC;
|
||||
if (match(A, m_Constant(AC)) && match(B, m_Constant(BC)) &&
|
||||
areInverseVectorBitmasks(AC, BC))
|
||||
return ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty));
|
||||
|
||||
// If both operands are xor'd with constants using the same sexted boolean
|
||||
// operand, see if the constants are inverse bitmasks.
|
||||
if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AC)))) &&
|
||||
match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BC)))) &&
|
||||
Cond->getType()->getScalarType()->isIntegerTy(1) &&
|
||||
areInverseVectorBitmasks(AC, BC)) {
|
||||
AC = ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty));
|
||||
return Builder.CreateXor(Cond, AC);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -1625,7 +1662,7 @@ static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D,
|
|||
B = SrcB;
|
||||
}
|
||||
|
||||
if (Value *Cond = getSelectCondition(A, B)) {
|
||||
if (Value *Cond = getSelectCondition(A, B, Builder)) {
|
||||
// ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D))
|
||||
// The bitcasts will either all exist or all not exist. The builder will
|
||||
// not create unnecessary casts if the types already match.
|
||||
|
|
|
@ -366,15 +366,12 @@ define i4 @vec_of_casted_bools(i4 %a, i4 %b, <4 x i1> %c) {
|
|||
ret i4 %or
|
||||
}
|
||||
|
||||
; FIXME: Missed conversions to select below here.
|
||||
; Inverted 'and' constants mean this is a select.
|
||||
|
||||
define <4 x i32> @vec_sel_consts(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: @vec_sel_consts(
|
||||
; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 -1>
|
||||
; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 0>
|
||||
; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[OR]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
|
||||
;
|
||||
%and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 -1>
|
||||
%and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 0>
|
||||
|
@ -386,10 +383,8 @@ define <4 x i32> @vec_sel_consts(<4 x i32> %a, <4 x i32> %b) {
|
|||
|
||||
define <3 x i129> @vec_sel_consts_weird(<3 x i129> %a, <3 x i129> %b) {
|
||||
; CHECK-LABEL: @vec_sel_consts_weird(
|
||||
; CHECK-NEXT: [[AND1:%.*]] = and <3 x i129> %a, <i129 -1, i129 0, i129 -1>
|
||||
; CHECK-NEXT: [[AND2:%.*]] = and <3 x i129> %b, <i129 0, i129 -1, i129 0>
|
||||
; CHECK-NEXT: [[OR:%.*]] = or <3 x i129> [[AND2]], [[AND1]]
|
||||
; CHECK-NEXT: ret <3 x i129> [[OR]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> <i1 false, i1 true, i1 false>, <3 x i129> %b, <3 x i129> %a
|
||||
; CHECK-NEXT: ret <3 x i129> [[TMP1]]
|
||||
;
|
||||
%and1 = and <3 x i129> %a, <i129 -1, i129 0, i129 -1>
|
||||
%and2 = and <3 x i129> %b, <i129 0, i129 -1, i129 0>
|
||||
|
@ -416,13 +411,9 @@ define <4 x i32> @vec_not_sel_consts(<4 x i32> %a, <4 x i32> %b) {
|
|||
|
||||
define <4 x i32> @vec_sel_xor(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) {
|
||||
; CHECK-LABEL: @vec_sel_xor(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = sext <4 x i1> %c to <4 x i32>
|
||||
; CHECK-NEXT: [[MASK_FLIP1:%.*]] = xor <4 x i32> [[MASK]], <i32 -1, i32 0, i32 0, i32 0>
|
||||
; CHECK-NEXT: [[NOT_MASK_FLIP1:%.*]] = xor <4 x i32> [[MASK]], <i32 0, i32 -1, i32 -1, i32 -1>
|
||||
; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[NOT_MASK_FLIP1]], %a
|
||||
; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> [[MASK_FLIP1]], %b
|
||||
; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[OR]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> %c, <i1 false, i1 true, i1 true, i1 true>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> %a, <4 x i32> %b
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
||||
;
|
||||
%mask = sext <4 x i1> %c to <4 x i32>
|
||||
%mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0>
|
||||
|
@ -433,3 +424,25 @@ define <4 x i32> @vec_sel_xor(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) {
|
|||
ret <4 x i32> %or
|
||||
}
|
||||
|
||||
; Allow the transform even if the mask values have multiple uses because
|
||||
; there's still a net reduction of instructions from removing the and/and/or.
|
||||
|
||||
define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) {
|
||||
; CHECK-LABEL: @vec_sel_xor_multi_use(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = sext <4 x i1> %c to <4 x i32>
|
||||
; CHECK-NEXT: [[MASK_FLIP1:%.*]] = xor <4 x i32> [[MASK]], <i32 -1, i32 0, i32 0, i32 0>
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> %c, <i1 false, i1 true, i1 true, i1 true>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> %a, <4 x i32> %b
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[TMP2]], [[MASK_FLIP1]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[ADD]]
|
||||
;
|
||||
%mask = sext <4 x i1> %c to <4 x i32>
|
||||
%mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0>
|
||||
%not_mask_flip1 = xor <4 x i32> %mask, <i32 0, i32 -1, i32 -1, i32 -1>
|
||||
%and1 = and <4 x i32> %not_mask_flip1, %a
|
||||
%and2 = and <4 x i32> %mask_flip1, %b
|
||||
%or = or <4 x i32> %and1, %and2
|
||||
%add = add <4 x i32> %or, %mask_flip1
|
||||
ret <4 x i32> %add
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue