[X86] Fix pshuflw formation from repeated shuffle mask (PR43230)

Fix for https://bugs.llvm.org/show_bug.cgi?id=43230.

When creating PSHUFLW from a repeated shuffle mask, we have to apply
the checks to the repeated mask, not the original one. For the test
case from PR43230 the inspected part of the original mask is all undef.

Differential Revision: https://reviews.llvm.org/D67314

llvm-svn: 371307
This commit is contained in:
Nikita Popov 2019-09-07 12:13:44 +00:00
parent fdc6977ff3
commit 314893cc4b
2 changed files with 10 additions and 19 deletions

View File

@ -32041,8 +32041,8 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) { if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) {
SmallVector<int, 4> RepeatedMask; SmallVector<int, 4> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
ArrayRef<int> LoMask(Mask.data() + 0, 4); ArrayRef<int> LoMask(RepeatedMask.data() + 0, 4);
ArrayRef<int> HiMask(Mask.data() + 4, 4); ArrayRef<int> HiMask(RepeatedMask.data() + 4, 4);
// PSHUFLW: permute lower 4 elements only. // PSHUFLW: permute lower 4 elements only.
if (isUndefOrInRange(LoMask, 0, 4) && if (isUndefOrInRange(LoMask, 0, 4) &&

View File

@ -4777,23 +4777,14 @@ define <16 x i16> @pr43230(<16 x i16> %a, <16 x i16> %b) {
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq ; AVX1-NEXT: retq
; ;
; AVX2-SLOW-LABEL: pr43230: ; AVX2-LABEL: pr43230:
; AVX2-SLOW: # %bb.0: ; AVX2: # %bb.0:
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; AVX2-SLOW-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[26,27],zero,zero ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[26,27],zero,zero
; AVX2-SLOW-NEXT: retq ; AVX2-NEXT: retq
;
; AVX2-FAST-LABEL: pr43230:
; AVX2-FAST: # %bb.0:
; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-FAST-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; AVX2-FAST-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; AVX2-FAST-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX2-FAST-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX2-FAST-NEXT: retq
; ;
; AVX512VL-LABEL: pr43230: ; AVX512VL-LABEL: pr43230:
; AVX512VL: # %bb.0: ; AVX512VL: # %bb.0: