[X86][SSE] Swap X86ISD::BLENDV inputs with an inverted selection mask (PR42825)

As discussed on PR42825, if we are inverting the selection mask we can just swap the inputs and avoid the inversion.

Differential Revision: https://reviews.llvm.org/D65522

llvm-svn: 368438
This commit is contained in:
Simon Pilgrim 2019-08-09 12:44:20 +00:00
parent 991834a516
commit 60394f47b0
3 changed files with 19 additions and 21 deletions

View File

@ -36615,6 +36615,12 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (SDValue V = narrowVectorSelect(N, DAG, Subtarget))
return V;
// select(~Cond, X, Y) -> select(Cond, Y, X)
if (CondVT.getScalarType() != MVT::i1)
if (SDValue CondNot = IsNOT(Cond, DAG))
return DAG.getNode(N->getOpcode(), DL, VT,
DAG.getBitcast(CondVT, CondNot), RHS, LHS);
// Custom action for SELECT MMX
if (VT == MVT::x86mmx) {
LHS = DAG.getBitcast(MVT::i64, LHS);

View File

@ -157,10 +157,9 @@ define <16 x i8> @xor_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
; CHECK-LABEL: xor_pblendvb:
; CHECK: # %bb.0:
; CHECK-NEXT: movdqa %xmm0, %xmm3
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: pxor %xmm2, %xmm0
; CHECK-NEXT: pblendvb %xmm0, %xmm1, %xmm3
; CHECK-NEXT: movdqa %xmm3, %xmm0
; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: pblendvb %xmm0, %xmm3, %xmm1
; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = xor <16 x i8> %a2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%2 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %1)
@ -170,11 +169,10 @@ define <16 x i8> @xor_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
; CHECK-LABEL: xor_blendvps:
; CHECK: # %bb.0:
; CHECK-NEXT: movdqa %xmm0, %xmm3
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: pxor %xmm2, %xmm0
; CHECK-NEXT: blendvps %xmm0, %xmm1, %xmm3
; CHECK-NEXT: movaps %xmm3, %xmm0
; CHECK-NEXT: movaps %xmm0, %xmm3
; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: blendvps %xmm0, %xmm3, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = bitcast <4 x float> %a2 to <4 x i32>
%2 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
@ -186,11 +184,10 @@ define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %
define <2 x double> @xor_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
; CHECK-LABEL: xor_blendvpd:
; CHECK: # %bb.0:
; CHECK-NEXT: movdqa %xmm0, %xmm3
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
; CHECK-NEXT: pxor %xmm2, %xmm0
; CHECK-NEXT: blendvpd %xmm0, %xmm1, %xmm3
; CHECK-NEXT: movapd %xmm3, %xmm0
; CHECK-NEXT: movapd %xmm0, %xmm3
; CHECK-NEXT: movaps %xmm2, %xmm0
; CHECK-NEXT: blendvpd %xmm0, %xmm3, %xmm1
; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = bitcast <2 x double> %a2 to <4 x i32>
%2 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>

View File

@ -1852,25 +1852,20 @@ define <16 x i32> @test_masked_v16i32(i8 * %addr, <16 x i32> %old, <16 x i32> %m
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6
; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm3, %xmm3
; AVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm2, %xmm2
; AVX1-NEXT: vpxor %xmm6, %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm4
; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm5
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4
; AVX1-NEXT: vblendvps %ymm3, %ymm4, %ymm1, %ymm1
; AVX1-NEXT: vblendvps %ymm3, %ymm1, %ymm4, %ymm1
; AVX1-NEXT: vmovntdqa (%rdi), %xmm3
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm4
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm3, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_masked_v16i32: