forked from OSchip/llvm-project
[X86][AVX512] Add support for 512-bit PSHUFB lowering
llvm-svn: 274444
This commit is contained in:
parent
77dda7c2e0
commit
cde7c54baa
|
@ -7256,11 +7256,12 @@ static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
|
|||
const int NumEltBytes = VT.getScalarSizeInBits() / 8;
|
||||
|
||||
assert((Subtarget.hasSSSE3() && VT.is128BitVector()) ||
|
||||
(Subtarget.hasAVX2() && VT.is256BitVector()));
|
||||
(Subtarget.hasAVX2() && VT.is256BitVector()) ||
|
||||
(Subtarget.hasBWI() && VT.is512BitVector()));
|
||||
|
||||
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
|
||||
|
||||
SmallVector<SDValue, 32> PSHUFBMask(NumBytes);
|
||||
SmallVector<SDValue, 64> PSHUFBMask(NumBytes);
|
||||
// Sign bit set in i8 mask means zero element.
|
||||
SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8);
|
||||
|
||||
|
@ -11909,6 +11910,10 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
|
||||
return Rotate;
|
||||
|
||||
if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1,
|
||||
V2, Subtarget, DAG))
|
||||
return PSHUFB;
|
||||
|
||||
// FIXME: Implement direct support for this type!
|
||||
return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
|
||||
}
|
||||
|
|
|
@ -2744,11 +2744,7 @@ define <32 x i16> @test_bitreverse_v32i16(<32 x i16> %a) nounwind {
|
|||
;
|
||||
; AVX512BW-LABEL: test_bitreverse_v32i16:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
|
||||
; AVX512BW-NEXT: vpshufb %ymm2, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpshufb %ymm2, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30,33,32,35,34,37,36,39,38,41,40,43,42,45,44,47,46,49,48,51,50,53,52,55,54,57,56,59,58,61,60,63,62]
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
|
||||
|
@ -3175,11 +3171,7 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind {
|
|||
;
|
||||
; AVX512BW-LABEL: test_bitreverse_v16i32:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
|
||||
; AVX512BW-NEXT: vpshufb %ymm2, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpshufb %ymm2, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28,35,34,33,32,39,38,37,36,43,42,41,40,47,46,45,44,51,50,49,48,55,54,53,52,59,58,57,56,63,62,61,60]
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
|
||||
|
@ -3710,11 +3702,7 @@ define <8 x i64> @test_bitreverse_v8i64(<8 x i64> %a) nounwind {
|
|||
;
|
||||
; AVX512BW-LABEL: test_bitreverse_v8i64:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
|
||||
; AVX512BW-NEXT: vpshufb %ymm2, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpshufb %ymm2, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24,39,38,37,36,35,34,33,32,47,46,45,44,43,42,41,40,55,54,53,52,51,50,49,48,63,62,61,60,59,58,57,56]
|
||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240,0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240]
|
||||
|
|
Loading…
Reference in New Issue