From ca7fdd41bda02a24c401ecf75d306cea93c27fb5 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 3 Jan 2020 14:57:35 -0500
Subject: [PATCH] [DAGCombiner] fix miscompile in translating (X & undef) to
 shuffle

See PR42982 for more context:
https://bugs.llvm.org/show_bug.cgi?id=42982
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +++-
 llvm/test/CodeGen/X86/combine-and.ll          | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3bfea38932f3..d401e7fb657b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19769,8 +19769,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
       int EltIdx = i / Split;
       int SubIdx = i % Split;
       SDValue Elt = RHS.getOperand(EltIdx);
+      // X & undef --> 0 (not undef). So this lane must be converted to choose
+      // from the zero constant vector (same as if the element had all 0-bits).
       if (Elt.isUndef()) {
-        Indices.push_back(-1);
+        Indices.push_back(i + NumSubElts);
         continue;
       }
 
diff --git a/llvm/test/CodeGen/X86/combine-and.ll b/llvm/test/CodeGen/X86/combine-and.ll
index c3a394873759..462374c4862d 100644
--- a/llvm/test/CodeGen/X86/combine-and.ll
+++ b/llvm/test/CodeGen/X86/combine-and.ll
@@ -163,13 +163,13 @@ define <4 x i32> @test14(<4 x i32> %A) {
   ret <4 x i32> %1
 }
 
-; FIXME: X & undef must fold to 0. So lane 0 must choose from the zero vector.
+; X & undef must fold to 0. So lane 0 must choose from the zero vector.
 
 define <4 x i32> @undef_lane(<4 x i32> %x) {
 ; CHECK-LABEL: undef_lane:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorps %xmm1, %xmm1
-; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
+; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
 ; CHECK-NEXT:    retq
   %r = and <4 x i32> %x, <i32 undef, i32 4294967295, i32 0, i32 4294967295>
   ret <4 x i32> %r