[X86][SSE] Add VZEXT_MOVL target shuffle decoding.

Add support for decoding VZEXT_MOVL target shuffle masks, allowing it to be used as a source in target shuffle combines.

llvm-svn: 258215
This commit is contained in:
Simon Pilgrim 2016-01-19 23:04:56 +00:00
parent fe4a7b3ccd
commit 4b919b2ab3
3 changed files with 9 additions and 12 deletions

View File

@ -3939,6 +3939,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::VPERMI:
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
case X86ISD::VZEXT_MOVL:
return true;
}
}
@ -4886,6 +4887,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::VZEXT_MOVL:
DecodeZeroMoveLowMask(VT, Mask);
IsUnary = true;
break;
case X86ISD::PSHUFB: {
IsUnary = true;
SDValue MaskNode = N->getOperand(1);

View File

@ -24,16 +24,12 @@ define <4 x float> @shuffle_v4f32_0z27(<4 x float> %x, <4 x float> %a) {
define <4 x float> @shuffle_v4f32_0zz4(<4 x float> %xyzw, <4 x float> %abcd) {
; SSE-LABEL: shuffle_v4f32_0zz4:
; SSE: # BB#0:
; SSE-NEXT: xorps %xmm2, %xmm2
; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v4f32_0zz4:
; AVX: # BB#0:
; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
; AVX-NEXT: retq
%vecext = extractelement <4 x float> %xyzw, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0

View File

@ -507,16 +507,12 @@ define <4 x float> @shuf_W00W(<4 x float> %x, <4 x float> %a) {
define <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) {
; X32-LABEL: shuf_X00A:
; X32: ## BB#0:
; X32-NEXT: xorps %xmm2, %xmm2
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: shuf_X00A:
; X64: ## BB#0:
; X64-NEXT: xorps %xmm2, %xmm2
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0