forked from OSchip/llvm-project
[X86][SSE] Accept any shuffle mask that is all zeroes
Until we have a better way to extract constants through bitcasted build vectors (and how to handle undefs of partial lanes etc.) at least accept build vectors that are all zeroes. llvm-svn: 274833
This commit is contained in:
parent
627ca96e4d
commit
828c731880
|
@ -4788,6 +4788,13 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
|
|||
if (MaskNode.getOpcode() != ISD::BUILD_VECTOR)
|
||||
return false;
|
||||
|
||||
// We can always decode if the buildvector is all zero constants,
|
||||
// but can't use isBuildVectorAllZeros as it might contain UNDEFs.
|
||||
if (llvm::all_of(MaskNode->ops(), X86::isZeroNode)) {
|
||||
RawMask.append(VT.getSizeInBits() / MaskEltSizeInBits, 0);
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
|
||||
if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
|
||||
return false;
|
||||
|
|
|
@ -139,8 +139,7 @@ define <2 x double> @combine_vpermilvar_2f64_identity(<2 x double> %a0) {
|
|||
define <2 x double> @combine_vpermilvar_2f64_movddup(<2 x double> %a0) {
|
||||
; ALL-LABEL: combine_vpermilvar_2f64_movddup:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
|
||||
; ALL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; ALL-NEXT: retq
|
||||
%1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 0, i64 0>)
|
||||
ret <2 x double> %1
|
||||
|
|
|
@ -393,8 +393,7 @@ define <16 x i32> @combine_permvar_as_vpbroadcastd512(<16 x i32> %x0) {
|
|||
define <8 x i64> @combine_permvar_as_vpbroadcastq512(<8 x i64> %x0) {
|
||||
; CHECK-LABEL: combine_permvar_as_vpbroadcastq512:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1
|
||||
; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpbroadcastq %xmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> zeroinitializer, <8 x i64> undef, i8 -1)
|
||||
ret <8 x i64> %1
|
||||
|
|
Loading…
Reference in New Issue