forked from OSchip/llvm-project
[X86] LowerRotate - enable ROTL vXi16 rotate-by-splat-amount on pre-AVX targets
To enable this on all targets there's still a number of regressions due to getSplatValue/getTargetVShiftNode but these don't really affect pre-AVX targets.
This commit is contained in:
parent
be7dbd674c
commit
75d8507e45
|
@ -29947,8 +29947,9 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
|
|||
// Attempt to fold as unpack(x,x) << zext(splat(y)):
|
||||
// rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.
|
||||
// rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).
|
||||
// TODO: Handle vXi16 cases.
|
||||
if (EltSizeInBits == 8 || EltSizeInBits == 32) {
|
||||
// TODO: Handle vXi16 cases on all targets.
|
||||
if (EltSizeInBits == 8 || EltSizeInBits == 32 ||
|
||||
(IsROTL && EltSizeInBits == 16 && !Subtarget.hasAVX())) {
|
||||
if (SDValue BaseRotAmt = DAG.getSplatValue(AmtMod)) {
|
||||
unsigned ShiftX86Opc = IsROTL ? X86ISD::VSHLI : X86ISD::VSRLI;
|
||||
SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
|
||||
|
|
|
@ -933,30 +933,30 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind
|
|||
define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind {
|
||||
; SSE2-LABEL: splatvar_funnnel_v8i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0]
|
||||
; SSE2-NEXT: pand %xmm1, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE2-NEXT: psllw %xmm2, %xmm3
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
|
||||
; SSE2-NEXT: psubw %xmm1, %xmm2
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE2-NEXT: psrlw %xmm2, %xmm0
|
||||
; SSE2-NEXT: por %xmm3, %xmm0
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE2-NEXT: pslld %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrad $16, %xmm2
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSE2-NEXT: pslld %xmm1, %xmm0
|
||||
; SSE2-NEXT: psrad $16, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: splatvar_funnnel_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psllw %xmm2, %xmm3
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
|
||||
; SSE41-NEXT: psubw %xmm1, %xmm2
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; SSE41-NEXT: psrlw %xmm1, %xmm0
|
||||
; SSE41-NEXT: por %xmm3, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; SSE41-NEXT: pslld %xmm1, %xmm2
|
||||
; SSE41-NEXT: psrld $16, %xmm2
|
||||
; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSE41-NEXT: pslld %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrld $16, %xmm0
|
||||
; SSE41-NEXT: packusdw %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_funnnel_v8i16:
|
||||
|
@ -1049,17 +1049,17 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind
|
|||
;
|
||||
; X86-SSE2-LABEL: splatvar_funnnel_v8i16:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0]
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X86-SSE2-NEXT: psllw %xmm2, %xmm3
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
|
||||
; X86-SSE2-NEXT: psubw %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
|
||||
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-SSE2-NEXT: psrlw %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: por %xmm3, %xmm0
|
||||
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
|
||||
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-SSE2-NEXT: pslld %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: psrad $16, %xmm2
|
||||
; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; X86-SSE2-NEXT: pslld %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: psrad $16, %xmm0
|
||||
; X86-SSE2-NEXT: packssdw %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: retl
|
||||
%splat = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%res = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x, <8 x i16> %x, <8 x i16> %splat)
|
||||
|
|
|
@ -918,30 +918,30 @@ define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
|
|||
define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
||||
; SSE2-LABEL: splatvar_rotate_v8i16:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0]
|
||||
; SSE2-NEXT: pand %xmm1, %xmm2
|
||||
; SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE2-NEXT: psllw %xmm2, %xmm3
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
|
||||
; SSE2-NEXT: psubw %xmm1, %xmm2
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE2-NEXT: psrlw %xmm2, %xmm0
|
||||
; SSE2-NEXT: por %xmm3, %xmm0
|
||||
; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
|
||||
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; SSE2-NEXT: pslld %xmm1, %xmm2
|
||||
; SSE2-NEXT: psrad $16, %xmm2
|
||||
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSE2-NEXT: pslld %xmm1, %xmm0
|
||||
; SSE2-NEXT: psrad $16, %xmm0
|
||||
; SSE2-NEXT: packssdw %xmm2, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: splatvar_rotate_v8i16:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE41-NEXT: psllw %xmm2, %xmm3
|
||||
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
|
||||
; SSE41-NEXT: psubw %xmm1, %xmm2
|
||||
; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
|
||||
; SSE41-NEXT: psrlw %xmm1, %xmm0
|
||||
; SSE41-NEXT: por %xmm3, %xmm0
|
||||
; SSE41-NEXT: movdqa %xmm0, %xmm2
|
||||
; SSE41-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; SSE41-NEXT: pslld %xmm1, %xmm2
|
||||
; SSE41-NEXT: psrld $16, %xmm2
|
||||
; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; SSE41-NEXT: pslld %xmm1, %xmm0
|
||||
; SSE41-NEXT: psrld $16, %xmm0
|
||||
; SSE41-NEXT: packusdw %xmm2, %xmm0
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatvar_rotate_v8i16:
|
||||
|
@ -1034,17 +1034,17 @@ define <8 x i16> @splatvar_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
|
|||
;
|
||||
; X86-SSE2-LABEL: splatvar_rotate_v8i16:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
|
||||
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0]
|
||||
; X86-SSE2-NEXT: pand %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X86-SSE2-NEXT: psllw %xmm2, %xmm3
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
|
||||
; X86-SSE2-NEXT: psubw %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
|
||||
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-SSE2-NEXT: psrlw %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: por %xmm3, %xmm0
|
||||
; X86-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
|
||||
; X86-SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-SSE2-NEXT: pslld %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: psrad $16, %xmm2
|
||||
; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; X86-SSE2-NEXT: pslld %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: psrad $16, %xmm0
|
||||
; X86-SSE2-NEXT: packssdw %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: retl
|
||||
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%splat16 = sub <8 x i16> <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>, %splat
|
||||
|
|
Loading…
Reference in New Issue