From 85e613963372beb6f5af39592b16bcee748bb0c3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 16 Jan 2018 14:21:28 +0000 Subject: [PATCH] [X86][MMX] Improve MMX constant generation Extend the MMX zero code to take any constant with zero'd upper 32-bits llvm-svn: 322553 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 15 ++++++++++++--- llvm/test/CodeGen/X86/mmx-fold-load.ll | 21 ++++++++------------- llvm/test/CodeGen/X86/vector-shuffle-mmx.ll | 14 ++++---------- 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 24e2459114e7..ec495063524e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5396,6 +5396,12 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, SmallVector SrcEltBits(1, Cst->getAPIntValue()); return CastBitData(UndefSrcElts, SrcEltBits); } + if (auto *Cst = dyn_cast(Op)) { + APInt UndefSrcElts = APInt::getNullValue(1); + APInt RawBits = Cst->getValueAPF().bitcastToAPInt(); + SmallVector SrcEltBits(1, RawBits); + return CastBitData(UndefSrcElts, SrcEltBits); + } // Extract constant bits from build vector. if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { @@ -30407,11 +30413,14 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, // it's better to handle them early to be sure we emit efficient code by // avoiding store-load conversions. if (VT == MVT::x86mmx) { - // Detect zero MMX vectors. - if (X86::isZeroNode(N0) || ISD::isBuildVectorAllZeros(N0.getNode())) { + // Detect zero-extended MMX constant vectors. + APInt UndefElts; + SmallVector EltBits; + if (getTargetConstantBitsFromNode(N0, 32, UndefElts, EltBits) && + EltBits[1] == 0) { SDLoc DL(N0); return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT, - DAG.getConstant(0, DL, MVT::i32)); + DAG.getConstant(EltBits[0], DL, MVT::i32)); } // Detect bitcasts between i32 to x86mmx low word. diff --git a/llvm/test/CodeGen/X86/mmx-fold-load.ll b/llvm/test/CodeGen/X86/mmx-fold-load.ll index 860487d9f082..71b8b40fc5d7 100644 --- a/llvm/test/CodeGen/X86/mmx-fold-load.ll +++ b/llvm/test/CodeGen/X86/mmx-fold-load.ll @@ -582,20 +582,15 @@ declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) define void @test_psrlq_by_volatile_shift_amount(x86_mmx* %t) nounwind { ; X86-LABEL: test_psrlq_by_volatile_shift_amount: ; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl $1, {{[0-9]+}}(%esp) -; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 -; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl $255, {{[0-9]+}}(%esp) -; X86-NEXT: movq {{[0-9]+}}(%esp), %mm1 +; X86-NEXT: pushl %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $1, (%esp) +; X86-NEXT: movd (%esp), %mm0 +; X86-NEXT: movl $255, %ecx +; X86-NEXT: movd %ecx, %mm1 ; X86-NEXT: psrlq %mm0, %mm1 ; X86-NEXT: movq %mm1, (%eax) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp +; X86-NEXT: popl %eax ; X86-NEXT: retl ; ; X64-LABEL: test_psrlq_by_volatile_shift_amount: @@ -603,7 +598,7 @@ define void @test_psrlq_by_volatile_shift_amount(x86_mmx* %t) nounwind { ; X64-NEXT: movl $1, -{{[0-9]+}}(%rsp) ; X64-NEXT: movd -{{[0-9]+}}(%rsp), %mm0 ; X64-NEXT: movl $255, %eax -; X64-NEXT: movq %rax, %mm1 +; X64-NEXT: movd %eax, %mm1 ; X64-NEXT: psrlq %mm0, %mm1 ; X64-NEXT: movq %mm1, (%rdi) ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll index 2361cb00f960..a2f6ecc25ccf 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll @@ -95,36 +95,30 @@ entry: define <4 x float> @pr35869() nounwind { ; X32-LABEL: pr35869: ; X32: ## %bb.0: -; X32-NEXT: subl $12, %esp ; X32-NEXT: movl $64, %eax -; X32-NEXT: movd %eax, %xmm0 -; X32-NEXT: movq %xmm0, (%esp) -; X32-NEXT: movq (%esp), %mm0 +; X32-NEXT: movd %eax, %mm0 ; X32-NEXT: pxor %mm1, %mm1 ; X32-NEXT: punpcklbw %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] -; X32-NEXT: pxor %xmm0, %xmm0 ; X32-NEXT: pcmpgtw %mm0, %mm1 ; X32-NEXT: movq %mm0, %mm2 ; X32-NEXT: punpckhwd %mm1, %mm2 ## mm2 = mm2[2],mm1[2],mm2[3],mm1[3] +; X32-NEXT: xorps %xmm0, %xmm0 ; X32-NEXT: cvtpi2ps %mm2, %xmm0 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; X32-NEXT: punpcklwd %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1] ; X32-NEXT: cvtpi2ps %mm0, %xmm0 -; X32-NEXT: addl $12, %esp ; X32-NEXT: retl ; ; X64-LABEL: pr35869: ; X64: ## %bb.0: ; X64-NEXT: movl $64, %eax -; X64-NEXT: movd %eax, %xmm0 -; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm0 +; X64-NEXT: movd %eax, %mm0 ; X64-NEXT: pxor %mm1, %mm1 ; X64-NEXT: punpcklbw %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] -; X64-NEXT: pxor %xmm0, %xmm0 ; X64-NEXT: pcmpgtw %mm0, %mm1 ; X64-NEXT: movq %mm0, %mm2 ; X64-NEXT: punpckhwd %mm1, %mm2 ## mm2 = mm2[2],mm1[2],mm2[3],mm1[3] +; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: cvtpi2ps %mm2, %xmm0 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] ; X64-NEXT: punpcklwd %mm1, %mm0 ## mm0 = mm0[0],mm1[0],mm0[1],mm1[1]