From 2de209313e88467ec0103b1c69d37f80eef03107 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 28 Nov 2018 22:51:04 +0000 Subject: [PATCH] [x86] try select simplification for target-specific nodes This failed to select (which might be a separate bug) in X86ISelDAGToDAG because we try to create a select node that can be simplified away after rL347227. This change avoids the problem by simplifying the SHRUNKBLEND node sooner. In the test case, we manage to realize that the true/false values of the select (SHRUNKBLEND) are the same thing, so it simplifies away completely. llvm-svn: 347818 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +++- llvm/test/CodeGen/X86/vselect.ll | 51 +++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8e487a08376e..37748e1c4004 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33674,9 +33674,14 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc DL(N); SDValue Cond = N->getOperand(0); - // Get the LHS/RHS of the select. SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); + + // Try simplification again because we use this function to optimize + // SHRUNKBLEND nodes that are not handled by the generic combiner. + if (SDValue V = DAG.simplifySelect(Cond, LHS, RHS)) + return V; + EVT VT = LHS.getValueType(); EVT CondVT = Cond.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll index 2d08e21d941b..5c19a01db81d 100644 --- a/llvm/test/CodeGen/X86/vselect.ll +++ b/llvm/test/CodeGen/X86/vselect.ll @@ -559,3 +559,54 @@ define <2 x i64> @shrunkblend_nonvselectuse(<2 x i1> %cond, <2 x i64> %a, <2 x i ret <2 x i64> %z } +; This turns into a SHRUNKBLEND with SSE4 or later, and via +; late shuffle magic, both sides of the blend are the same +; value. If that is not simplified before isel, it can fail +; to match (crash). + +define <2 x i32> @simplify_select(i32 %x, <2 x i1> %z) { +; SSE2-LABEL: simplify_select: +; SSE2: # %bb.0: +; SSE2-NEXT: # kill: def $edi killed $edi def $rdi +; SSE2-NEXT: psllq $63, %xmm0 +; SSE2-NEXT: psrad $31, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: movq %rdi, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] +; SSE2-NEXT: movdqa %xmm2, %xmm3 +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0] +; SSE2-NEXT: pand %xmm0, %xmm2 +; SSE2-NEXT: pandn %xmm3, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: simplify_select: +; SSE41: # %bb.0: +; SSE41-NEXT: # kill: def $edi killed $edi def $rdi +; SSE41-NEXT: movq %rdi, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; SSE41-NEXT: retq +; +; AVX1-LABEL: simplify_select: +; AVX1: # %bb.0: +; AVX1-NEXT: # kill: def $edi killed $edi def $rdi +; AVX1-NEXT: vmovq %rdi, %xmm0 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: retq +; +; AVX2-LABEL: simplify_select: +; AVX2: # %bb.0: +; AVX2-NEXT: # kill: def $edi killed $edi def $rdi +; AVX2-NEXT: vmovq %rdi, %xmm0 +; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 +; AVX2-NEXT: retq + %a = insertelement <2 x i32> , i32 %x, i32 1 + %b = insertelement <2 x i32> , i32 %x, i32 0 + %y = or <2 x i32> %a, %b + %p16 = extractelement <2 x i32> %y, i32 1 + %p17 = insertelement <2 x i32> undef, i32 %p16, i32 0 + %p18 = insertelement <2 x i32> %p17, i32 %x, i32 1 + %r = select <2 x i1> %z, <2 x i32> %y, <2 x i32> %p18 + ret <2 x i32> %r +} +