[InstCombine] eliminate commuted select-shuffles + binop (PR41304)

If we have a commutable vector binop with inverted select-shuffles,
we don't care about the order of the operands in each vector lane:

LHS = shuffle V1, V2, <0, 5, 6, 3>
RHS = shuffle V2, V1, <0, 5, 6, 3>
LHS + RHS --> <V1[0]+V2[0], V2[1]+V1[1], V2[2]+V1[2], V1[3]+V2[3]> --> V1 + V2

PR41304:
https://bugs.llvm.org/show_bug.cgi?id=41304
...is currently titled as an SLP enhancement, but at least for the
given example, we can reduce that in instcombine because we are just
eliminating shuffles.

As noted in the TODO, this could be generalized, but I haven't thought
through those patterns completely, so this is limited to what appears
to be always safe.

Differential Revision: https://reviews.llvm.org/D60048

llvm-svn: 357382
This commit is contained in:
Sanjay Patel 2019-04-01 13:36:40 +00:00
parent d9f6ee1c3c
commit 97d1bc4454
2 changed files with 76 additions and 21 deletions

View File

@ -1415,6 +1415,30 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
return createBinOpShuffle(V1, V2, Mask);
}
// If both arguments of a commutative binop are select-shuffles that use the
// same mask with commuted operands, the shuffles are unnecessary.
if (Inst.isCommutative() &&
match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask))) &&
match(RHS, m_ShuffleVector(m_Specific(V2), m_Specific(V1),
m_Specific(Mask)))) {
auto *LShuf = cast<ShuffleVectorInst>(LHS);
auto *RShuf = cast<ShuffleVectorInst>(RHS);
// TODO: Allow shuffles that contain undefs in the mask?
// That is legal, but it reduces undef knowledge.
// TODO: Allow arbitrary shuffles by shuffling after binop?
// That might be legal, but we have to deal with poison.
if (LShuf->isSelect() && !LShuf->getMask()->containsUndefElement() &&
RShuf->isSelect() && !RShuf->getMask()->containsUndefElement()) {
// Example:
// LHS = shuffle V1, V2, <0, 5, 6, 3>
// RHS = shuffle V2, V1, <0, 5, 6, 3>
// LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
NewBO->copyIRFlags(&Inst);
return NewBO;
}
}
// If one argument is a shuffle within one vector and the other is a constant,
// try moving the shuffle after the binary operation. This canonicalization
// intends to move shuffles closer to other shuffles and binops closer to

View File

@ -5,9 +5,7 @@
define <4 x i32> @and(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @and(
; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: [[R:%.*]] = and <4 x i32> [[SEL1]], [[SEL2]]
; CHECK-NEXT: [[R:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
@ -18,9 +16,7 @@ define <4 x i32> @and(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @or(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @or(
; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[SEL1]], [[SEL2]]
; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
@ -33,9 +29,7 @@ define <4 x i32> @or(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @xor(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @xor(
; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[R:%.*]] = xor <4 x i32> [[SEL1]], [[SEL2]]
; CHECK-NEXT: [[R:%.*]] = xor <4 x i32> [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
@ -48,9 +42,7 @@ define <4 x i32> @xor(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @add(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @add(
; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
@ -59,6 +51,51 @@ define <4 x i32> @add(<4 x i32> %x, <4 x i32> %y) {
ret <4 x i32> %r
}
; Negative test - wrong operand
define <4 x i32> @add_wrong_op(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: @add_wrong_op(
; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
%sel2 = shufflevector <4 x i32> %y, <4 x i32> %z, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
%r = add nsw <4 x i32> %sel1, %sel2
ret <4 x i32> %r
}
; Negative test - wrong mask (but we could handle this...)
define <4 x i32> @add_non_select_mask(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @add_non_select_mask(
; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 1, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 1, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
%sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
%r = add nsw <4 x i32> %sel1, %sel2
ret <4 x i32> %r
}
; Negative test - wrong mask (but we could handle this...)
define <4 x i32> @add_masks_with_undefs(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @add_masks_with_undefs(
; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
%sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
%r = add nsw <4 x i32> %sel1, %sel2
ret <4 x i32> %r
}
; Non-commutative opcode
define <4 x i32> @sub(<4 x i32> %x, <4 x i32> %y) {
@ -76,9 +113,7 @@ define <4 x i32> @sub(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @mul(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @mul(
; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: [[R:%.*]] = mul nuw <4 x i32> [[SEL1]], [[SEL2]]
; CHECK-NEXT: [[R:%.*]] = mul nuw <4 x i32> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
@ -180,9 +215,7 @@ define <4 x i32> @ashr(<4 x i32> %x, <4 x i32> %y) {
define <4 x float> @fadd(<4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @fadd(
; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[SEL1]], [[SEL2]]
; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: ret <4 x float> [[R]]
;
%sel1 = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
@ -206,9 +239,7 @@ define <4 x float> @fsub(<4 x float> %x, <4 x float> %y) {
define <4 x double> @fmul(<4 x double> %x, <4 x double> %y) {
; CHECK-LABEL: @fmul(
; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[R:%.*]] = fmul nnan <4 x double> [[SEL1]], [[SEL2]]
; CHECK-NEXT: [[R:%.*]] = fmul nnan <4 x double> [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: ret <4 x double> [[R]]
;
%sel1 = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>