From 2eb3512090b8a3be45d88b53b2416e6e16455efb Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 16 May 2018 15:15:22 +0000
Subject: [PATCH] [InstCombine] allow more binop (shuffle X), C transforms

The canonicalization was restricted to shuffle masks with
a 1-to-1 mapping to the constant vector, but that disqualifies
the common splat pattern. This is part of solving PR37463:
https://bugs.llvm.org/show_bug.cgi?id=37463

llvm-svn: 332479
---
 .../InstCombine/InstructionCombining.cpp      |  6 +++--
 .../Transforms/InstCombine/vec_shuffle.ll     | 25 ++++++++++++++-----
 2 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5dc636326918..6c48db5d225d 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1401,7 +1401,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
     // Find constant NewC that has property:
     //   shuffle(NewC, ShMask) = C
     // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
-    // reorder is not possible.
+    // reorder is not possible. A 1-to-1 mapping is not required. Example:
+    // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
     SmallVector<int, 16> ShMask;
     ShuffleVectorInst::getShuffleMask(Mask, ShMask);
     SmallVector<Constant *, 16>
@@ -1411,7 +1412,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
       if (ShMask[I] >= 0) {
         assert(ShMask[I] < (int)VWidth);
         Constant *CElt = C->getAggregateElement(I);
-        if (!CElt || !isa<UndefValue>(NewVecC[ShMask[I]])) {
+        Constant *NewCElt = NewVecC[ShMask[I]];
+        if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt)) {
           MayChange = false;
           break;
         }
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 171907e28c7c..6824c3c1266e 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -439,9 +439,9 @@ define <2 x float> @fadd_const_multiuse(<2 x float> %v) {
 
 define <4 x i32> @mul_const_splat(<4 x i32> %v) {
 ; CHECK-LABEL: @mul_const_splat(
-; CHECK-NEXT:    [[T1:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[R:%.*]] = mul <4 x i32> [[T1]], <i32 42, i32 42, i32 42, i32 42>
-; CHECK-NEXT:    ret <4 x i32> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[V:%.*]], <i32 undef, i32 42, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %t1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %r = mul <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %t1
@@ -452,15 +452,28 @@ define <4 x i32> @mul_const_splat(<4 x i32> %v) {
 
 define <4 x i32> @lshr_const_half_splat(<4 x i32> %v) {
 ; CHECK-LABEL: @lshr_const_half_splat(
-; CHECK-NEXT:    [[T1:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
-; CHECK-NEXT:    [[R:%.*]] = lshr <4 x i32> <i32 8, i32 8, i32 9, i32 9>, [[T1]]
-; CHECK-NEXT:    ret <4 x i32> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> <i32 undef, i32 8, i32 9, i32 undef>, [[V:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %t1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
   %r = lshr <4 x i32> <i32 8, i32 8, i32 9, i32 9>, %t1
   ret <4 x i32> %r
 }
 
+; We can't change this because there's no pre-shuffle version of the fmul constant.
+
+define <2 x float> @fmul_const_invalid_constant(<2 x float> %v) {
+; CHECK-LABEL: @fmul_const_invalid_constant(
+; CHECK-NEXT:    [[T1:%.*]] = shufflevector <2 x float> [[V:%.*]], <2 x float> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = fmul <2 x float> [[T1]], <float 4.100000e+01, float 4.200000e+01>
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %t1 = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 0, i32 0>
+  %r = fmul <2 x float> %t1, <float 41.0, float 42.0>
+  ret <2 x float> %r
+}
+
 define <4 x i32> @shuffle_17add2(<4 x i32> %v) {
 ; CHECK-LABEL: @shuffle_17add2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 1, i32 1>