forked from OSchip/llvm-project
[X86] Fix pshuflw formation from repeated shuffle mask (PR43230)
Fix for https://bugs.llvm.org/show_bug.cgi?id=43230. When creating PSHUFLW from a repeated shuffle mask, we have to apply the checks to the repeated mask, not the original one. For the test case from PR43230 the inspected part of the original mask is all undef. Differential Revision: https://reviews.llvm.org/D67314 llvm-svn: 371307
This commit is contained in:
parent
fdc6977ff3
commit
314893cc4b
|
@ -32041,8 +32041,8 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
|
||||||
if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) {
|
if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) {
|
||||||
SmallVector<int, 4> RepeatedMask;
|
SmallVector<int, 4> RepeatedMask;
|
||||||
if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
|
if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
|
||||||
ArrayRef<int> LoMask(Mask.data() + 0, 4);
|
ArrayRef<int> LoMask(RepeatedMask.data() + 0, 4);
|
||||||
ArrayRef<int> HiMask(Mask.data() + 4, 4);
|
ArrayRef<int> HiMask(RepeatedMask.data() + 4, 4);
|
||||||
|
|
||||||
// PSHUFLW: permute lower 4 elements only.
|
// PSHUFLW: permute lower 4 elements only.
|
||||||
if (isUndefOrInRange(LoMask, 0, 4) &&
|
if (isUndefOrInRange(LoMask, 0, 4) &&
|
||||||
|
|
|
@ -4777,23 +4777,14 @@ define <16 x i16> @pr43230(<16 x i16> %a, <16 x i16> %b) {
|
||||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-SLOW-LABEL: pr43230:
|
; AVX2-LABEL: pr43230:
|
||||||
; AVX2-SLOW: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||||
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||||
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||||
; AVX2-SLOW-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[26,27],zero,zero
|
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[26,27],zero,zero
|
||||||
; AVX2-SLOW-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
;
|
|
||||||
; AVX2-FAST-LABEL: pr43230:
|
|
||||||
; AVX2-FAST: # %bb.0:
|
|
||||||
; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
|
||||||
; AVX2-FAST-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
|
||||||
; AVX2-FAST-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
|
||||||
; AVX2-FAST-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
|
||||||
; AVX2-FAST-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
|
||||||
; AVX2-FAST-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: pr43230:
|
; AVX512VL-LABEL: pr43230:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
|
|
Loading…
Reference in New Issue