[InstCombine] eliminate commuted select-shuffles + binop (PR41304)

If we have a commutable vector binop with inverted select-shuffles, we don't care about the order of the operands in each vector lane: LHS = shuffle V1, V2, <0, 5, 6, 3> RHS = shuffle V2, V1, <0, 5, 6, 3> LHS + RHS --> <V1[0]+V2[0], V2[1]+V1[1], V2[2]+V1[2], V1[3]+V2[3]> --> V1 + V2 PR41304: https://bugs.llvm.org/show_bug.cgi?id=41304 ...is currently titled as an SLP enhancement, but at least for the given example, we can reduce that in instcombine because we are just eliminating shuffles. As noted in the TODO, this could be generalized, but I haven't thought through those patterns completely, so this is limited to what appears to be always safe. Differential Revision: https://reviews.llvm.org/D60048 llvm-svn: 357382
2019-04-01 13:36:40 +00:00 · 2019-04-01 13:36:40 +00:00 · 97d1bc4454
parent d9f6ee1c3c
commit 97d1bc4454
2 changed files with 76 additions and 21 deletions
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@ -1415,6 +1415,30 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
    return createBinOpShuffle(V1, V2, Mask);
  }

+  // If both arguments of a commutative binop are select-shuffles that use the
+  // same mask with commuted operands, the shuffles are unnecessary.
+  if (Inst.isCommutative() &&
+      match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask))) &&
+      match(RHS, m_ShuffleVector(m_Specific(V2), m_Specific(V1),
+                                 m_Specific(Mask)))) {
+    auto *LShuf = cast<ShuffleVectorInst>(LHS);
+    auto *RShuf = cast<ShuffleVectorInst>(RHS);
+    // TODO: Allow shuffles that contain undefs in the mask?
+    //       That is legal, but it reduces undef knowledge.
+    // TODO: Allow arbitrary shuffles by shuffling after binop?
+    //       That might be legal, but we have to deal with poison.
+    if (LShuf->isSelect() && !LShuf->getMask()->containsUndefElement() &&
+        RShuf->isSelect() && !RShuf->getMask()->containsUndefElement()) {
+      // Example:
+      // LHS = shuffle V1, V2, <0, 5, 6, 3>
+      // RHS = shuffle V2, V1, <0, 5, 6, 3>
+      // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
+      Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
+      NewBO->copyIRFlags(&Inst);
+      return NewBO;
+    }
+  }
+
  // If one argument is a shuffle within one vector and the other is a constant,
  // try moving the shuffle after the binary operation. This canonicalization
  // intends to move shuffles closer to other shuffles and binops closer to
--- a/llvm/test/Transforms/InstCombine/vec-binop-select.ll
+++ b/llvm/test/Transforms/InstCombine/vec-binop-select.ll
@ -5,9 +5,7 @@

 define <4 x i32> @and(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @and(
-; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    [[R:%.*]] = and <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    [[R:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
@ -18,9 +16,7 @@ define <4 x i32> @and(<4 x i32> %x, <4 x i32> %y) {

 define <4 x i32> @or(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @or(
-; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
@ -33,9 +29,7 @@ define <4 x i32> @or(<4 x i32> %x, <4 x i32> %y) {

 define <4 x i32> @xor(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @xor(
-; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[R:%.*]] = xor <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    [[R:%.*]] = xor <4 x i32> [[Y:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
@ -48,9 +42,7 @@ define <4 x i32> @xor(<4 x i32> %x, <4 x i32> %y) {

 define <4 x i32> @add(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @add(
-; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    [[R:%.*]] = add nsw <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
@ -59,6 +51,51 @@ define <4 x i32> @add(<4 x i32> %x, <4 x i32> %y) {
  ret <4 x i32> %r
 }

+; Negative test - wrong operand
+
+define <4 x i32> @add_wrong_op(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: @add_wrong_op(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %z, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %r = add nsw <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+; Negative test - wrong mask (but we could handle this...)
+
+define <4 x i32> @add_non_select_mask(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @add_non_select_mask(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+  %r = add nsw <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+; Negative test - wrong mask (but we could handle this...)
+
+define <4 x i32> @add_masks_with_undefs(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @add_masks_with_undefs(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+  %r = add nsw <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
 ; Non-commutative opcode

 define <4 x i32> @sub(<4 x i32> %x, <4 x i32> %y) {
@ -76,9 +113,7 @@ define <4 x i32> @sub(<4 x i32> %x, <4 x i32> %y) {

 define <4 x i32> @mul(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @mul(
-; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT:    [[R:%.*]] = mul nuw <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
@ -180,9 +215,7 @@ define <4 x i32> @ashr(<4 x i32> %x, <4 x i32> %y) {

 define <4 x float> @fadd(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: @fadd(
-; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[R:%.*]] = fadd <4 x float> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    [[R:%.*]] = fadd <4 x float> [[Y:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
  %sel1 = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
@ -206,9 +239,7 @@ define <4 x float> @fsub(<4 x float> %x, <4 x float> %y) {

 define <4 x double> @fmul(<4 x double> %x, <4 x double> %y) {
 ; CHECK-LABEL: @fmul(
-; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[R:%.*]] = fmul nnan <4 x double> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    [[R:%.*]] = fmul nnan <4 x double> [[Y:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    ret <4 x double> [[R]]
 ;
  %sel1 = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>