forked from OSchip/llvm-project
[DAG] SelectionDAG::isSplatValue - add initial BITCAST handling
This patch adds support for recognising vector splats by peeking through bitcasts to vectors with smaller element types - if all the offset subelements are splats then the bitcasted vector is a splat as well. We don't have great coverage for isSplatValue so I've made this pretty specific to the use case I'm trying to fix - regressions in some vXi64 vector shift by splat cases that 32-bit x86 doesn't recognise because the shift amount buildvector has been type legalised to v2Xi32. We can add further support (floats, bitcast from larger element types, undef elements) when we have actual test coverage. Differential Revision: https://reviews.llvm.org/D120553
This commit is contained in:
parent
0817ce86b5
commit
df0a2b4f30
|
@ -2638,6 +2638,39 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
|
|||
}
|
||||
break;
|
||||
}
|
||||
case ISD::BITCAST: {
|
||||
SDValue Src = V.getOperand(0);
|
||||
EVT SrcVT = Src.getValueType();
|
||||
unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
|
||||
unsigned BitWidth = VT.getScalarSizeInBits();
|
||||
|
||||
// Ignore bitcasts from unsupported types.
|
||||
// TODO: Add fp support?
|
||||
if (!SrcVT.isVector() || !SrcVT.isInteger() || !VT.isInteger())
|
||||
break;
|
||||
|
||||
// Bitcast 'small element' vector to 'large element' vector.
|
||||
if ((BitWidth % SrcBitWidth) == 0) {
|
||||
// See if each sub element is a splat.
|
||||
unsigned Scale = BitWidth / SrcBitWidth;
|
||||
unsigned NumSrcElts = SrcVT.getVectorNumElements();
|
||||
APInt ScaledDemandedElts =
|
||||
APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
|
||||
for (unsigned I = 0; I != Scale; ++I) {
|
||||
APInt SubUndefElts;
|
||||
APInt SubDemandedElt = APInt::getOneBitSet(Scale, I);
|
||||
APInt SubDemandedElts = APInt::getSplat(NumSrcElts, SubDemandedElt);
|
||||
SubDemandedElts &= ScaledDemandedElts;
|
||||
if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1))
|
||||
return false;
|
||||
// TODO: Add support for merging sub undef elements.
|
||||
if (SubDemandedElts.isSubsetOf(SubUndefElts))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
@ -62,11 +62,11 @@ allocas:
|
|||
define <4 x i64> @shiftInput___64in32bitmode(<4 x i64> %input, i64 %shiftval) nounwind {
|
||||
; X86-LABEL: shiftInput___64in32bitmode:
|
||||
; X86: # %bb.0: # %allocas
|
||||
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
|
||||
; X86-NEXT: vpsrlq %xmm2, %xmm1, %xmm1
|
||||
; X86-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
|
||||
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; X86-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
|
||||
; X86-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
|
||||
; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: shiftInput___64in32bitmode:
|
||||
|
@ -87,26 +87,20 @@ allocas:
|
|||
define <4 x i64> @shiftInput___2x32bitcast(<4 x i64> %input, i32 %shiftval) nounwind {
|
||||
; X86-LABEL: shiftInput___2x32bitcast:
|
||||
; X86: # %bb.0: # %allocas
|
||||
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X86-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
; X86-NEXT: vpsrlq %xmm2, %xmm1, %xmm1
|
||||
; X86-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
|
||||
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; X86-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X86-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
|
||||
; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: shiftInput___2x32bitcast:
|
||||
; X64: # %bb.0: # %allocas
|
||||
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X64-NEXT: vmovd %edi, %xmm2
|
||||
; X64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],zero,xmm2[0],zero
|
||||
; X64-NEXT: vpsrlq %xmm2, %xmm1, %xmm3
|
||||
; X64-NEXT: vpermilpd {{.*#+}} xmm4 = xmm2[1,1]
|
||||
; X64-NEXT: vpsrlq %xmm4, %xmm1, %xmm1
|
||||
; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
|
||||
; X64-NEXT: vpsrlq %xmm2, %xmm0, %xmm2
|
||||
; X64-NEXT: vpsrlq %xmm4, %xmm0, %xmm0
|
||||
; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
|
||||
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; X64-NEXT: vmovd %edi, %xmm1
|
||||
; X64-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; X64-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
|
||||
; X64-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
allocas:
|
||||
%smear.0 = insertelement <8 x i32> zeroinitializer, i32 %shiftval, i32 0
|
||||
|
|
|
@ -1036,23 +1036,14 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
|
|||
;
|
||||
; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,1,0,1]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [63,0,63,0]
|
||||
; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
|
||||
; X86-SSE2-NEXT: pandn %xmm4, %xmm5
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [63,0,63,0]
|
||||
; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
|
||||
; X86-SSE2-NEXT: pandn %xmm3, %xmm4
|
||||
; X86-SSE2-NEXT: psrlq $1, %xmm1
|
||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: psrlq %xmm5, %xmm2
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
|
||||
; X86-SSE2-NEXT: psrlq %xmm5, %xmm1
|
||||
; X86-SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
|
||||
; X86-SSE2-NEXT: pand %xmm4, %xmm3
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: psllq %xmm3, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; X86-SSE2-NEXT: psllq %xmm3, %xmm0
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; X86-SSE2-NEXT: orpd %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: psrlq %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pand %xmm3, %xmm2
|
||||
; X86-SSE2-NEXT: psllq %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: por %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: retl
|
||||
%splat = shufflevector <2 x i64> %amt, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
%res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %splat)
|
||||
|
|
|
@ -802,24 +802,15 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
|
|||
;
|
||||
; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [63,0,63,0]
|
||||
; X86-SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
; X86-SSE2-NEXT: psubq %xmm1, %xmm3
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; X86-SSE2-NEXT: psllq %xmm1, %xmm4
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm5
|
||||
; X86-SSE2-NEXT: psllq %xmm1, %xmm5
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm3
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: psrlq %xmm3, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; X86-SSE2-NEXT: psrlq %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; X86-SSE2-NEXT: orpd %xmm5, %xmm0
|
||||
; X86-SSE2-NEXT: psrlq %xmm3, %xmm0
|
||||
; X86-SSE2-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE2-NEXT: retl
|
||||
%splat = shufflevector <2 x i64> %amt, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
%res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> %splat)
|
||||
|
|
|
@ -1126,23 +1126,14 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %
|
|||
;
|
||||
; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,1,0,1]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [63,0,63,0]
|
||||
; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
|
||||
; X86-SSE2-NEXT: pand %xmm4, %xmm5
|
||||
; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: psrlq %xmm5, %xmm2
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
|
||||
; X86-SSE2-NEXT: psrlq %xmm5, %xmm1
|
||||
; X86-SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
|
||||
; X86-SSE2-NEXT: pandn %xmm4, %xmm3
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [63,0,63,0]
|
||||
; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
|
||||
; X86-SSE2-NEXT: pand %xmm3, %xmm4
|
||||
; X86-SSE2-NEXT: psrlq %xmm4, %xmm1
|
||||
; X86-SSE2-NEXT: pandn %xmm3, %xmm2
|
||||
; X86-SSE2-NEXT: psllq $1, %xmm0
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: psllq %xmm3, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; X86-SSE2-NEXT: psllq %xmm3, %xmm0
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; X86-SSE2-NEXT: orpd %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: psllq %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: por %xmm1, %xmm0
|
||||
; X86-SSE2-NEXT: retl
|
||||
%splat = shufflevector <2 x i64> %amt, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
%res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %splat)
|
||||
|
|
|
@ -828,24 +828,15 @@ define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind
|
|||
;
|
||||
; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [63,0,63,0]
|
||||
; X86-SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
; X86-SSE2-NEXT: psubq %xmm1, %xmm3
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm4
|
||||
; X86-SSE2-NEXT: psrlq %xmm1, %xmm4
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm5
|
||||
; X86-SSE2-NEXT: psrlq %xmm1, %xmm5
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
|
||||
; X86-SSE2-NEXT: pand %xmm2, %xmm3
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: psllq %xmm3, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
|
||||
; X86-SSE2-NEXT: psllq %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; X86-SSE2-NEXT: orpd %xmm5, %xmm0
|
||||
; X86-SSE2-NEXT: psllq %xmm3, %xmm0
|
||||
; X86-SSE2-NEXT: por %xmm4, %xmm0
|
||||
; X86-SSE2-NEXT: retl
|
||||
%splat = shufflevector <2 x i64> %amt, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
%res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> %splat)
|
||||
|
|
|
@ -788,17 +788,12 @@ define <2 x i64> @splatvar_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
|
|||
;
|
||||
; X86-SSE2-LABEL: splatvar_rotate_v2i64:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [64,0,64,0]
|
||||
; X86-SSE2-NEXT: psubq %xmm2, %xmm3
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
|
||||
; X86-SSE2-NEXT: psllq %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
|
||||
; X86-SSE2-NEXT: psrlq %xmm3, %xmm1
|
||||
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
|
||||
; X86-SSE2-NEXT: psrlq %xmm3, %xmm0
|
||||
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||
; X86-SSE2-NEXT: orpd %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [64,0,0,0]
|
||||
; X86-SSE2-NEXT: psubq %xmm1, %xmm2
|
||||
; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
|
||||
; X86-SSE2-NEXT: psllq %xmm1, %xmm3
|
||||
; X86-SSE2-NEXT: psrlq %xmm2, %xmm0
|
||||
; X86-SSE2-NEXT: por %xmm3, %xmm0
|
||||
; X86-SSE2-NEXT: retl
|
||||
%splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
|
||||
%splat64 = sub <2 x i64> <i64 64, i64 64>, %splat
|
||||
|
|
Loading…
Reference in New Issue