From 0ad17402a90819a8485abf938505ea44871f6459 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 15 Mar 2019 11:05:42 +0000 Subject: [PATCH] [X86][SSE] Attempt to convert SSE shift-by-var to shift-by-imm. Prep work for PR40203 llvm-svn: 356249 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 +++++- llvm/test/CodeGen/X86/vec_shift2.ll | 8 +--- llvm/test/CodeGen/X86/vector-idiv-v2i32.ll | 51 +++++++++------------- 3 files changed, 35 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 47ebe254b99e..222a6e82729a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36527,11 +36527,22 @@ static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG, X86ISD::VSRL == N->getOpcode()) && "Unexpected shift opcode"); EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); // Shift zero -> zero. - if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) + if (ISD::isBuildVectorAllZeros(N0.getNode())) return DAG.getConstant(0, SDLoc(N), VT); + // Detect constant shift amounts. + APInt UndefElts; + SmallVector EltBits; + if (getTargetConstantBitsFromNode(N1, 64, UndefElts, EltBits, true, false)) { + unsigned X86Opc = getTargetVShiftUniformOpcode(N->getOpcode(), false); + return getTargetVShiftByConstNode(X86Opc, SDLoc(N), VT.getSimpleVT(), N0, + EltBits[0].getZExtValue(), DAG); + } + APInt KnownUndef, KnownZero; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); diff --git a/llvm/test/CodeGen/X86/vec_shift2.ll b/llvm/test/CodeGen/X86/vec_shift2.ll index 7a1ade72a385..a38187f190f9 100644 --- a/llvm/test/CodeGen/X86/vec_shift2.ll +++ b/llvm/test/CodeGen/X86/vec_shift2.ll @@ -5,16 +5,12 @@ define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind { ; X32-LABEL: t1: ; X32: # %bb.0: -; X32-NEXT: movl $14, %eax -; X32-NEXT: movd %eax, %xmm1 -; X32-NEXT: psrlw %xmm1, %xmm0 +; X32-NEXT: psrlw $14, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: t1: ; X64: # %bb.0: -; X64-NEXT: movl $14, %eax -; X64-NEXT: movd %eax, %xmm1 -; X64-NEXT: psrlw %xmm1, %xmm0 +; X64-NEXT: psrlw $14, %xmm0 ; X64-NEXT: retq %tmp1 = bitcast <2 x i64> %b1 to <8 x i16> %tmp2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp1, <8 x i16> bitcast (<4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) nounwind readnone diff --git a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll index 333f5518f370..c0825262e3a7 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll @@ -618,37 +618,28 @@ define void @test_sdiv_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X86-NEXT: psrad $31, %xmm0 -; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3] -; X86-NEXT: movdqa %xmm1, %xmm0 -; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3] -; X86-NEXT: movdqa {{.*#+}} xmm2 = [31,0,31,0] -; X86-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] -; X86-NEXT: movdqa %xmm3, %xmm4 -; X86-NEXT: psrlq %xmm2, %xmm4 -; X86-NEXT: movl $31, %ecx -; X86-NEXT: movd %ecx, %xmm5 -; X86-NEXT: psrlq %xmm5, %xmm3 -; X86-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1] -; X86-NEXT: movdqa %xmm0, %xmm4 -; X86-NEXT: psrlq %xmm2, %xmm4 -; X86-NEXT: psrlq %xmm5, %xmm0 -; X86-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1] -; X86-NEXT: xorpd %xmm3, %xmm0 -; X86-NEXT: psubq %xmm3, %xmm0 -; X86-NEXT: pand {{\.LCPI.*}}, %xmm0 -; X86-NEXT: psrlq $29, %xmm0 -; X86-NEXT: paddq %xmm1, %xmm0 -; X86-NEXT: psllq $32, %xmm0 -; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] -; X86-NEXT: psrad $31, %xmm0 -; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X86-NEXT: psrlq $3, %xmm1 -; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; X86-NEXT: psrad $31, %xmm1 +; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] +; X86-NEXT: movdqa %xmm0, %xmm2 +; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3] +; X86-NEXT: psrlq $31, %xmm2 +; X86-NEXT: movsd {{.*#+}} xmm2 = xmm2[0,1] +; X86-NEXT: movapd {{.*#+}} xmm1 = [2.1219957909652723E-314,2.1219957909652723E-314] +; X86-NEXT: xorpd %xmm1, %xmm2 +; X86-NEXT: psubq %xmm1, %xmm2 +; X86-NEXT: pand {{\.LCPI.*}}, %xmm2 +; X86-NEXT: psrlq $29, %xmm2 +; X86-NEXT: paddq %xmm0, %xmm2 +; X86-NEXT: psllq $32, %xmm2 +; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,3,2,3] +; X86-NEXT: psrad $31, %xmm2 +; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3] +; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-NEXT: psrlq $3, %xmm0 +; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X86-NEXT: movq %xmm0, (%eax) ; X86-NEXT: retl ;