forked from OSchip/llvm-project
[X86][SSE] Add VZEXT_MOVL target shuffle decoding.
Add support for decoding VZEXT_MOVL target shuffle masks, allowing it to be used as a source in target shuffle combines. llvm-svn: 258215
This commit is contained in:
parent
fe4a7b3ccd
commit
4b919b2ab3
|
@ -3939,6 +3939,7 @@ static bool isTargetShuffle(unsigned Opcode) {
|
|||
case X86ISD::VPERMI:
|
||||
case X86ISD::VPERMV:
|
||||
case X86ISD::VPERMV3:
|
||||
case X86ISD::VZEXT_MOVL:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -4886,6 +4887,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
|
|||
DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
|
||||
IsUnary = true;
|
||||
break;
|
||||
case X86ISD::VZEXT_MOVL:
|
||||
DecodeZeroMoveLowMask(VT, Mask);
|
||||
IsUnary = true;
|
||||
break;
|
||||
case X86ISD::PSHUFB: {
|
||||
IsUnary = true;
|
||||
SDValue MaskNode = N->getOperand(1);
|
||||
|
|
|
@ -24,16 +24,12 @@ define <4 x float> @shuffle_v4f32_0z27(<4 x float> %x, <4 x float> %a) {
|
|||
define <4 x float> @shuffle_v4f32_0zz4(<4 x float> %xyzw, <4 x float> %abcd) {
|
||||
; SSE-LABEL: shuffle_v4f32_0zz4:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
|
||||
; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: shuffle_v4f32_0zz4:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
|
||||
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
|
||||
; AVX-NEXT: retq
|
||||
%vecext = extractelement <4 x float> %xyzw, i32 0
|
||||
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
|
||||
|
|
|
@ -507,16 +507,12 @@ define <4 x float> @shuf_W00W(<4 x float> %x, <4 x float> %a) {
|
|||
define <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) {
|
||||
; X32-LABEL: shuf_X00A:
|
||||
; X32: ## BB#0:
|
||||
; X32-NEXT: xorps %xmm2, %xmm2
|
||||
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
|
||||
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: shuf_X00A:
|
||||
; X64: ## BB#0:
|
||||
; X64-NEXT: xorps %xmm2, %xmm2
|
||||
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
|
||||
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
|
||||
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0]
|
||||
; X64-NEXT: retq
|
||||
%vecext = extractelement <4 x float> %x, i32 0
|
||||
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
|
||||
|
|
Loading…
Reference in New Issue