From 065431c82bfc697c588d70aad0c38262e98fc4ef Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 17 May 2019 17:56:25 +0000 Subject: [PATCH] [X86][SSE] Fold movmsk(not(x)) -> not(movmsk) Helps to improve folding of comparisons with movmsk results. llvm-svn: 361056 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 15 ++++++++++++++- llvm/test/CodeGen/X86/movmsk-cmp.ll | 8 +++----- llvm/test/CodeGen/X86/vector-compare-all_of.ll | 8 +++----- llvm/test/CodeGen/X86/vector-compare-any_of.ll | 10 +++------- 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 743e23977e50..4787741cb9b6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41498,6 +41498,8 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, SDValue Src = N->getOperand(0); MVT SrcVT = Src.getSimpleValueType(); MVT VT = N->getSimpleValueType(0); + unsigned NumBits = VT.getScalarSizeInBits(); + unsigned NumElts = SrcVT.getVectorNumElements(); // Perform constant folding. if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) { @@ -41517,9 +41519,20 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, Src.getOperand(0).getScalarValueSizeInBits() == EltWidth) return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0)); + // Fold movmsk(not(x)) -> not(movmsk) to improve folding of movmsk results + // with scalar comparisons. + if (SDValue NotSrc = IsNOT(Src, DAG)) { + SDLoc DL(N); + APInt NotMask = APInt::getLowBitsSet(NumBits, NumElts); + NotSrc = DAG.getBitcast(SrcVT, NotSrc); + return DAG.getNode(ISD::XOR, DL, VT, + DAG.getNode(X86ISD::MOVMSK, DL, VT, NotSrc), + DAG.getConstant(NotMask, DL, VT)); + } + // Simplify the inputs. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits())); + APInt DemandedMask(APInt::getAllOnesValue(NumBits)); if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) return SDValue(N, 0); diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index eeb523c38007..6f62787c950a 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -4485,9 +4485,8 @@ define i1 @movmsk_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] ; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: movmskpd %xmm0, %eax +; SSE2-NEXT: movmskpd %xmm1, %eax +; SSE2-NEXT: xorl $3, %eax ; SSE2-NEXT: cmpb $3, %al ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq @@ -4495,9 +4494,8 @@ define i1 @movmsk_v2i64(<2 x i64> %x, <2 x i64> %y) { ; AVX-LABEL: movmsk_v2i64: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskpd %xmm0, %eax +; AVX-NEXT: xorl $3, %eax ; AVX-NEXT: cmpb $3, %al ; AVX-NEXT: sete %al ; AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll index abd9495c4ab0..c5a86ca70b51 100644 --- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll @@ -1102,9 +1102,8 @@ define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { ; SSE-LABEL: bool_reduction_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE-NEXT: pxor %xmm0, %xmm1 -; SSE-NEXT: movmskps %xmm1, %eax +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: xorl $15, %eax ; SSE-NEXT: cmpb $15, %al ; SSE-NEXT: sete %al ; SSE-NEXT: retq @@ -1112,9 +1111,8 @@ define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { ; AVX-LABEL: bool_reduction_v4i32: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskps %xmm0, %eax +; AVX-NEXT: xorl $15, %eax ; AVX-NEXT: cmpb $15, %al ; AVX-NEXT: sete %al ; AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll index 051e3c0e7e74..a64d031fe755 100644 --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -1018,20 +1018,16 @@ define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { ; SSE-LABEL: bool_reduction_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE-NEXT: pxor %xmm0, %xmm1 -; SSE-NEXT: movmskps %xmm1, %eax -; SSE-NEXT: testb %al, %al +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: xorb $15, %al ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; ; AVX-LABEL: bool_reduction_v4i32: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovmskps %xmm0, %eax -; AVX-NEXT: testb %al, %al +; AVX-NEXT: xorb $15, %al ; AVX-NEXT: setne %al ; AVX-NEXT: retq ;