forked from OSchip/llvm-project
[x86] Lower some trunc + shuffle patterns to vpmov[q|d][b|w]
This should help in lowering the following four intrinsics: _mm256_cvtepi32_epi8 _mm256_cvtepi64_epi16 _mm256_cvtepi64_epi8 _mm512_cvtepi64_epi8 Differential Revision: https://reviews.llvm.org/D46957 llvm-svn: 335238
This commit is contained in:
parent
760d1d5741
commit
22c82af5c8
|
@ -4822,10 +4822,24 @@ static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return true if Val falls within the specified range (L, H].
|
||||||
|
static bool isInRange(int Val, int Low, int Hi) {
|
||||||
|
return (Val >= Low && Val < Hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return true if the value of any element in Mask falls within the specified
|
||||||
|
/// range (L, H].
|
||||||
|
static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
|
||||||
|
for (int M : Mask)
|
||||||
|
if (isInRange(M, Low, Hi))
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/// Return true if Val is undef or if its value falls within the
|
/// Return true if Val is undef or if its value falls within the
|
||||||
/// specified range (L, H].
|
/// specified range (L, H].
|
||||||
static bool isUndefOrInRange(int Val, int Low, int Hi) {
|
static bool isUndefOrInRange(int Val, int Low, int Hi) {
|
||||||
return (Val == SM_SentinelUndef) || (Val >= Low && Val < Hi);
|
return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return true if every element in Mask is undef or if its value
|
/// Return true if every element in Mask is undef or if its value
|
||||||
|
@ -4841,7 +4855,7 @@ static bool isUndefOrInRange(ArrayRef<int> Mask,
|
||||||
/// Return true if Val is undef, zero or if its value falls within the
|
/// Return true if Val is undef, zero or if its value falls within the
|
||||||
/// specified range (L, H].
|
/// specified range (L, H].
|
||||||
static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
|
static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
|
||||||
return isUndefOrZero(Val) || (Val >= Low && Val < Hi);
|
return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return true if every element in Mask is undef, zero or if its value
|
/// Return true if every element in Mask is undef, zero or if its value
|
||||||
|
@ -4854,11 +4868,11 @@ static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return true if every element in Mask, beginning
|
/// Return true if every element in Mask, beginning
|
||||||
/// from position Pos and ending in Pos+Size, falls within the specified
|
/// from position Pos and ending in Pos + Size, falls within the specified
|
||||||
/// sequential range (Low, Low+Size]. or is undef.
|
/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
|
||||||
static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
|
static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
|
||||||
unsigned Pos, unsigned Size, int Low) {
|
unsigned Size, int Low, int Step = 1) {
|
||||||
for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
|
for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
|
||||||
if (!isUndefOrEqual(Mask[i], Low))
|
if (!isUndefOrEqual(Mask[i], Low))
|
||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
|
@ -9390,6 +9404,99 @@ static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool matchVectorShuffleAsVPMOV(ArrayRef<int> Mask, bool SwappedOps,
|
||||||
|
int Delta) {
|
||||||
|
int Size = (int)Mask.size();
|
||||||
|
int Split = Size / Delta;
|
||||||
|
int TruncatedVectorStart = SwappedOps ? Size : 0;
|
||||||
|
|
||||||
|
// Match for mask starting with e.g.: <8, 10, 12, 14,... or <0, 2, 4, 6,...
|
||||||
|
if (!isSequentialOrUndefInRange(Mask, 0, Split, TruncatedVectorStart, Delta))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// The rest of the mask should not refer to the truncated vector's elements.
|
||||||
|
if (isAnyInRange(Mask.slice(Split, Size - Split), TruncatedVectorStart,
|
||||||
|
TruncatedVectorStart + Size))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to lower trunc+vector_shuffle to a vpmovdb or a vpmovdw instruction.
|
||||||
|
//
|
||||||
|
// An example is the following:
|
||||||
|
//
|
||||||
|
// t0: ch = EntryToken
|
||||||
|
// t2: v4i64,ch = CopyFromReg t0, Register:v4i64 %0
|
||||||
|
// t25: v4i32 = truncate t2
|
||||||
|
// t41: v8i16 = bitcast t25
|
||||||
|
// t21: v8i16 = BUILD_VECTOR undef:i16, undef:i16, undef:i16, undef:i16,
|
||||||
|
// Constant:i16<0>, Constant:i16<0>, Constant:i16<0>, Constant:i16<0>
|
||||||
|
// t51: v8i16 = vector_shuffle<0,2,4,6,12,13,14,15> t41, t21
|
||||||
|
// t18: v2i64 = bitcast t51
|
||||||
|
//
|
||||||
|
// Without avx512vl, this is lowered to:
|
||||||
|
//
|
||||||
|
// vpmovqd %zmm0, %ymm0
|
||||||
|
// vpshufb {{.*#+}} xmm0 =
|
||||||
|
// xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
|
||||||
|
//
|
||||||
|
// But when avx512vl is available, one can just use a single vpmovdw
|
||||||
|
// instruction.
|
||||||
|
static SDValue lowerVectorShuffleWithVPMOV(const SDLoc &DL, ArrayRef<int> Mask,
|
||||||
|
MVT VT, SDValue V1, SDValue V2,
|
||||||
|
SelectionDAG &DAG,
|
||||||
|
const X86Subtarget &Subtarget) {
|
||||||
|
if (VT != MVT::v16i8 && VT != MVT::v8i16)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
if (Mask.size() != VT.getVectorNumElements())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
bool SwappedOps = false;
|
||||||
|
|
||||||
|
if (!ISD::isBuildVectorAllZeros(V2.getNode())) {
|
||||||
|
if (!ISD::isBuildVectorAllZeros(V1.getNode()))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
std::swap(V1, V2);
|
||||||
|
SwappedOps = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for:
|
||||||
|
//
|
||||||
|
// bitcast (truncate <8 x i32> %vec to <8 x i16>) to <16 x i8>
|
||||||
|
// bitcast (truncate <4 x i64> %vec to <4 x i32>) to <8 x i16>
|
||||||
|
//
|
||||||
|
// and similar ones.
|
||||||
|
if (V1.getOpcode() != ISD::BITCAST)
|
||||||
|
return SDValue();
|
||||||
|
if (V1.getOperand(0).getOpcode() != ISD::TRUNCATE)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
SDValue Src = V1.getOperand(0).getOperand(0);
|
||||||
|
MVT SrcVT = Src.getSimpleValueType();
|
||||||
|
|
||||||
|
// The vptrunc** instructions truncating 128 bit and 256 bit vectors
|
||||||
|
// are only available with avx512vl.
|
||||||
|
if (!SrcVT.is512BitVector() && !Subtarget.hasVLX())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Down Convert Word to Byte is only available with avx512bw. The case with
|
||||||
|
// 256-bit output doesn't contain a shuffle and is therefore not handled here.
|
||||||
|
if (SrcVT.getVectorElementType() == MVT::i16 && VT == MVT::v16i8 &&
|
||||||
|
!Subtarget.hasBWI())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// The first half/quarter of the mask should refer to every second/fourth
|
||||||
|
// element of the vector truncated and bitcasted.
|
||||||
|
if (!matchVectorShuffleAsVPMOV(Mask, SwappedOps, 2) &&
|
||||||
|
!matchVectorShuffleAsVPMOV(Mask, SwappedOps, 4))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
return DAG.getNode(X86ISD::VTRUNC, DL, VT, Src);
|
||||||
|
}
|
||||||
|
|
||||||
// X86 has dedicated pack instructions that can handle specific truncation
|
// X86 has dedicated pack instructions that can handle specific truncation
|
||||||
// operations: PACKSS and PACKUS.
|
// operations: PACKSS and PACKUS.
|
||||||
static bool matchVectorShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1,
|
static bool matchVectorShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1,
|
||||||
|
@ -14923,6 +15030,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
|
||||||
if (canonicalizeShuffleMaskWithCommute(Mask))
|
if (canonicalizeShuffleMaskWithCommute(Mask))
|
||||||
return DAG.getCommutedVectorShuffle(*SVOp);
|
return DAG.getCommutedVectorShuffle(*SVOp);
|
||||||
|
|
||||||
|
if (SDValue V =
|
||||||
|
lowerVectorShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget))
|
||||||
|
return V;
|
||||||
|
|
||||||
// For each vector width, delegate to a specialized lowering routine.
|
// For each vector width, delegate to a specialized lowering routine.
|
||||||
if (VT.is128BitVector())
|
if (VT.is128BitVector())
|
||||||
return lower128BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
|
return lower128BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
|
||||||
|
|
|
@ -511,8 +511,7 @@ define <2 x i64> @trunc_v8i32_to_v8i8_return_v2i64(<8 x i32> %vec) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: trunc_v8i32_to_v8i8_return_v2i64:
|
; AVX512VL-LABEL: trunc_v8i32_to_v8i8_return_v2i64:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VL-NEXT: vzeroupper
|
; AVX512VL-NEXT: vzeroupper
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -526,15 +525,13 @@ define <2 x i64> @trunc_v8i32_to_v8i8_return_v2i64(<8 x i32> %vec) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512BWVL-LABEL: trunc_v8i32_to_v8i8_return_v2i64:
|
; AVX512BWVL-LABEL: trunc_v8i32_to_v8i8_return_v2i64:
|
||||||
; AVX512BWVL: # %bb.0:
|
; AVX512BWVL: # %bb.0:
|
||||||
; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
|
; AVX512BWVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||||
; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512BWVL-NEXT: vzeroupper
|
; AVX512BWVL-NEXT: vzeroupper
|
||||||
; AVX512BWVL-NEXT: retq
|
; AVX512BWVL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VBMIVL-LABEL: trunc_v8i32_to_v8i8_return_v2i64:
|
; AVX512VBMIVL-LABEL: trunc_v8i32_to_v8i8_return_v2i64:
|
||||||
; AVX512VBMIVL: # %bb.0:
|
; AVX512VBMIVL: # %bb.0:
|
||||||
; AVX512VBMIVL-NEXT: vpmovdw %ymm0, %xmm0
|
; AVX512VBMIVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||||
; AVX512VBMIVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VBMIVL-NEXT: vzeroupper
|
; AVX512VBMIVL-NEXT: vzeroupper
|
||||||
; AVX512VBMIVL-NEXT: retq
|
; AVX512VBMIVL-NEXT: retq
|
||||||
%truncated.vec = trunc <8 x i32> %vec to <8 x i8>
|
%truncated.vec = trunc <8 x i32> %vec to <8 x i8>
|
||||||
|
@ -573,8 +570,7 @@ define <16 x i8> @trunc_v8i32_to_v8i8_with_zext_return_v16i8(<8 x i32> %vec) nou
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: trunc_v8i32_to_v8i8_with_zext_return_v16i8:
|
; AVX512VL-LABEL: trunc_v8i32_to_v8i8_with_zext_return_v16i8:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VL-NEXT: vzeroupper
|
; AVX512VL-NEXT: vzeroupper
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -588,15 +584,13 @@ define <16 x i8> @trunc_v8i32_to_v8i8_with_zext_return_v16i8(<8 x i32> %vec) nou
|
||||||
;
|
;
|
||||||
; AVX512BWVL-LABEL: trunc_v8i32_to_v8i8_with_zext_return_v16i8:
|
; AVX512BWVL-LABEL: trunc_v8i32_to_v8i8_with_zext_return_v16i8:
|
||||||
; AVX512BWVL: # %bb.0:
|
; AVX512BWVL: # %bb.0:
|
||||||
; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
|
; AVX512BWVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||||
; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512BWVL-NEXT: vzeroupper
|
; AVX512BWVL-NEXT: vzeroupper
|
||||||
; AVX512BWVL-NEXT: retq
|
; AVX512BWVL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VBMIVL-LABEL: trunc_v8i32_to_v8i8_with_zext_return_v16i8:
|
; AVX512VBMIVL-LABEL: trunc_v8i32_to_v8i8_with_zext_return_v16i8:
|
||||||
; AVX512VBMIVL: # %bb.0:
|
; AVX512VBMIVL: # %bb.0:
|
||||||
; AVX512VBMIVL-NEXT: vpmovdw %ymm0, %xmm0
|
; AVX512VBMIVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||||
; AVX512VBMIVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VBMIVL-NEXT: vzeroupper
|
; AVX512VBMIVL-NEXT: vzeroupper
|
||||||
; AVX512VBMIVL-NEXT: retq
|
; AVX512VBMIVL-NEXT: retq
|
||||||
%truncated = trunc <8 x i32> %vec to <8 x i8>
|
%truncated = trunc <8 x i32> %vec to <8 x i8>
|
||||||
|
@ -636,8 +630,7 @@ define <16 x i8> @trunc_v8i32_to_v8i8_via_v8i16_return_v16i8(<8 x i32> %vec) nou
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: trunc_v8i32_to_v8i8_via_v8i16_return_v16i8:
|
; AVX512VL-LABEL: trunc_v8i32_to_v8i8_via_v8i16_return_v16i8:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VL-NEXT: vzeroupper
|
; AVX512VL-NEXT: vzeroupper
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -651,15 +644,13 @@ define <16 x i8> @trunc_v8i32_to_v8i8_via_v8i16_return_v16i8(<8 x i32> %vec) nou
|
||||||
;
|
;
|
||||||
; AVX512BWVL-LABEL: trunc_v8i32_to_v8i8_via_v8i16_return_v16i8:
|
; AVX512BWVL-LABEL: trunc_v8i32_to_v8i8_via_v8i16_return_v16i8:
|
||||||
; AVX512BWVL: # %bb.0:
|
; AVX512BWVL: # %bb.0:
|
||||||
; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
|
; AVX512BWVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||||
; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512BWVL-NEXT: vzeroupper
|
; AVX512BWVL-NEXT: vzeroupper
|
||||||
; AVX512BWVL-NEXT: retq
|
; AVX512BWVL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VBMIVL-LABEL: trunc_v8i32_to_v8i8_via_v8i16_return_v16i8:
|
; AVX512VBMIVL-LABEL: trunc_v8i32_to_v8i8_via_v8i16_return_v16i8:
|
||||||
; AVX512VBMIVL: # %bb.0:
|
; AVX512VBMIVL: # %bb.0:
|
||||||
; AVX512VBMIVL-NEXT: vpmovdw %ymm0, %xmm0
|
; AVX512VBMIVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||||
; AVX512VBMIVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VBMIVL-NEXT: vzeroupper
|
; AVX512VBMIVL-NEXT: vzeroupper
|
||||||
; AVX512VBMIVL-NEXT: retq
|
; AVX512VBMIVL-NEXT: retq
|
||||||
%truncated = trunc <8 x i32> %vec to <8 x i16>
|
%truncated = trunc <8 x i32> %vec to <8 x i16>
|
||||||
|
@ -698,8 +689,7 @@ define <16 x i8> @trunc_v8i32_to_v8i8_return_v16i8(<8 x i32> %vec) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: trunc_v8i32_to_v8i8_return_v16i8:
|
; AVX512VL-LABEL: trunc_v8i32_to_v8i8_return_v16i8:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
|
; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VL-NEXT: vzeroupper
|
; AVX512VL-NEXT: vzeroupper
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -713,15 +703,13 @@ define <16 x i8> @trunc_v8i32_to_v8i8_return_v16i8(<8 x i32> %vec) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512BWVL-LABEL: trunc_v8i32_to_v8i8_return_v16i8:
|
; AVX512BWVL-LABEL: trunc_v8i32_to_v8i8_return_v16i8:
|
||||||
; AVX512BWVL: # %bb.0:
|
; AVX512BWVL: # %bb.0:
|
||||||
; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
|
; AVX512BWVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||||
; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512BWVL-NEXT: vzeroupper
|
; AVX512BWVL-NEXT: vzeroupper
|
||||||
; AVX512BWVL-NEXT: retq
|
; AVX512BWVL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VBMIVL-LABEL: trunc_v8i32_to_v8i8_return_v16i8:
|
; AVX512VBMIVL-LABEL: trunc_v8i32_to_v8i8_return_v16i8:
|
||||||
; AVX512VBMIVL: # %bb.0:
|
; AVX512VBMIVL: # %bb.0:
|
||||||
; AVX512VBMIVL-NEXT: vpmovdw %ymm0, %xmm0
|
; AVX512VBMIVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||||
; AVX512VBMIVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VBMIVL-NEXT: vzeroupper
|
; AVX512VBMIVL-NEXT: vzeroupper
|
||||||
; AVX512VBMIVL-NEXT: retq
|
; AVX512VBMIVL-NEXT: retq
|
||||||
%truncated = trunc <8 x i32> %vec to <8 x i8>
|
%truncated = trunc <8 x i32> %vec to <8 x i8>
|
||||||
|
@ -766,8 +754,7 @@ define <2 x i64> @trunc_v4i64_to_v4i16_return_v2i64(<4 x i64> %vec) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: trunc_v4i64_to_v4i16_return_v2i64:
|
; AVX512VL-LABEL: trunc_v4i64_to_v4i16_return_v2i64:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VL-NEXT: vzeroupper
|
; AVX512VL-NEXT: vzeroupper
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -781,15 +768,13 @@ define <2 x i64> @trunc_v4i64_to_v4i16_return_v2i64(<4 x i64> %vec) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512BWVL-LABEL: trunc_v4i64_to_v4i16_return_v2i64:
|
; AVX512BWVL-LABEL: trunc_v4i64_to_v4i16_return_v2i64:
|
||||||
; AVX512BWVL: # %bb.0:
|
; AVX512BWVL: # %bb.0:
|
||||||
; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||||
; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512BWVL-NEXT: vzeroupper
|
; AVX512BWVL-NEXT: vzeroupper
|
||||||
; AVX512BWVL-NEXT: retq
|
; AVX512BWVL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VBMIVL-LABEL: trunc_v4i64_to_v4i16_return_v2i64:
|
; AVX512VBMIVL-LABEL: trunc_v4i64_to_v4i16_return_v2i64:
|
||||||
; AVX512VBMIVL: # %bb.0:
|
; AVX512VBMIVL: # %bb.0:
|
||||||
; AVX512VBMIVL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512VBMIVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||||
; AVX512VBMIVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VBMIVL-NEXT: vzeroupper
|
; AVX512VBMIVL-NEXT: vzeroupper
|
||||||
; AVX512VBMIVL-NEXT: retq
|
; AVX512VBMIVL-NEXT: retq
|
||||||
%truncated = trunc <4 x i64> %vec to <4 x i16>
|
%truncated = trunc <4 x i64> %vec to <4 x i16>
|
||||||
|
@ -833,8 +818,7 @@ define <8 x i16> @trunc_v4i64_to_v4i16_with_zext_return_v8i16(<4 x i64> %vec) no
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: trunc_v4i64_to_v4i16_with_zext_return_v8i16:
|
; AVX512VL-LABEL: trunc_v4i64_to_v4i16_with_zext_return_v8i16:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VL-NEXT: vzeroupper
|
; AVX512VL-NEXT: vzeroupper
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -848,15 +832,13 @@ define <8 x i16> @trunc_v4i64_to_v4i16_with_zext_return_v8i16(<4 x i64> %vec) no
|
||||||
;
|
;
|
||||||
; AVX512BWVL-LABEL: trunc_v4i64_to_v4i16_with_zext_return_v8i16:
|
; AVX512BWVL-LABEL: trunc_v4i64_to_v4i16_with_zext_return_v8i16:
|
||||||
; AVX512BWVL: # %bb.0:
|
; AVX512BWVL: # %bb.0:
|
||||||
; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||||
; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512BWVL-NEXT: vzeroupper
|
; AVX512BWVL-NEXT: vzeroupper
|
||||||
; AVX512BWVL-NEXT: retq
|
; AVX512BWVL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VBMIVL-LABEL: trunc_v4i64_to_v4i16_with_zext_return_v8i16:
|
; AVX512VBMIVL-LABEL: trunc_v4i64_to_v4i16_with_zext_return_v8i16:
|
||||||
; AVX512VBMIVL: # %bb.0:
|
; AVX512VBMIVL: # %bb.0:
|
||||||
; AVX512VBMIVL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512VBMIVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||||
; AVX512VBMIVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VBMIVL-NEXT: vzeroupper
|
; AVX512VBMIVL-NEXT: vzeroupper
|
||||||
; AVX512VBMIVL-NEXT: retq
|
; AVX512VBMIVL-NEXT: retq
|
||||||
%truncated = trunc <4 x i64> %vec to <4 x i16>
|
%truncated = trunc <4 x i64> %vec to <4 x i16>
|
||||||
|
@ -901,8 +883,7 @@ define <8 x i16> @trunc_v4i64_to_v4i16_via_v4i32_return_v8i16(<4 x i64> %vec) no
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: trunc_v4i64_to_v4i16_via_v4i32_return_v8i16:
|
; AVX512VL-LABEL: trunc_v4i64_to_v4i16_via_v4i32_return_v8i16:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VL-NEXT: vzeroupper
|
; AVX512VL-NEXT: vzeroupper
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -916,15 +897,13 @@ define <8 x i16> @trunc_v4i64_to_v4i16_via_v4i32_return_v8i16(<4 x i64> %vec) no
|
||||||
;
|
;
|
||||||
; AVX512BWVL-LABEL: trunc_v4i64_to_v4i16_via_v4i32_return_v8i16:
|
; AVX512BWVL-LABEL: trunc_v4i64_to_v4i16_via_v4i32_return_v8i16:
|
||||||
; AVX512BWVL: # %bb.0:
|
; AVX512BWVL: # %bb.0:
|
||||||
; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||||
; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512BWVL-NEXT: vzeroupper
|
; AVX512BWVL-NEXT: vzeroupper
|
||||||
; AVX512BWVL-NEXT: retq
|
; AVX512BWVL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VBMIVL-LABEL: trunc_v4i64_to_v4i16_via_v4i32_return_v8i16:
|
; AVX512VBMIVL-LABEL: trunc_v4i64_to_v4i16_via_v4i32_return_v8i16:
|
||||||
; AVX512VBMIVL: # %bb.0:
|
; AVX512VBMIVL: # %bb.0:
|
||||||
; AVX512VBMIVL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512VBMIVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||||
; AVX512VBMIVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VBMIVL-NEXT: vzeroupper
|
; AVX512VBMIVL-NEXT: vzeroupper
|
||||||
; AVX512VBMIVL-NEXT: retq
|
; AVX512VBMIVL-NEXT: retq
|
||||||
%truncated = trunc <4 x i64> %vec to <4 x i32>
|
%truncated = trunc <4 x i64> %vec to <4 x i32>
|
||||||
|
@ -968,8 +947,7 @@ define <8 x i16> @trunc_v4i64_to_v4i16_return_v8i16(<4 x i64> %vec) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: trunc_v4i64_to_v4i16_return_v8i16:
|
; AVX512VL-LABEL: trunc_v4i64_to_v4i16_return_v8i16:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VL-NEXT: vzeroupper
|
; AVX512VL-NEXT: vzeroupper
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -983,15 +961,13 @@ define <8 x i16> @trunc_v4i64_to_v4i16_return_v8i16(<4 x i64> %vec) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512BWVL-LABEL: trunc_v4i64_to_v4i16_return_v8i16:
|
; AVX512BWVL-LABEL: trunc_v4i64_to_v4i16_return_v8i16:
|
||||||
; AVX512BWVL: # %bb.0:
|
; AVX512BWVL: # %bb.0:
|
||||||
; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||||
; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512BWVL-NEXT: vzeroupper
|
; AVX512BWVL-NEXT: vzeroupper
|
||||||
; AVX512BWVL-NEXT: retq
|
; AVX512BWVL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VBMIVL-LABEL: trunc_v4i64_to_v4i16_return_v8i16:
|
; AVX512VBMIVL-LABEL: trunc_v4i64_to_v4i16_return_v8i16:
|
||||||
; AVX512VBMIVL: # %bb.0:
|
; AVX512VBMIVL: # %bb.0:
|
||||||
; AVX512VBMIVL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512VBMIVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||||
; AVX512VBMIVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512VBMIVL-NEXT: vzeroupper
|
; AVX512VBMIVL-NEXT: vzeroupper
|
||||||
; AVX512VBMIVL-NEXT: retq
|
; AVX512VBMIVL-NEXT: retq
|
||||||
%truncated = trunc <4 x i64> %vec to <4 x i16>
|
%truncated = trunc <4 x i64> %vec to <4 x i16>
|
||||||
|
@ -1034,8 +1010,7 @@ define <16 x i8> @trunc_v4i64_to_v4i8_return_v16i8(<4 x i64> %vec) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: trunc_v4i64_to_v4i8_return_v16i8:
|
; AVX512VL-LABEL: trunc_v4i64_to_v4i8_return_v16i8:
|
||||||
; AVX512VL: # %bb.0:
|
; AVX512VL: # %bb.0:
|
||||||
; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512VL-NEXT: vpmovqb %ymm0, %xmm0
|
||||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[u],zero
|
|
||||||
; AVX512VL-NEXT: vzeroupper
|
; AVX512VL-NEXT: vzeroupper
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -1049,15 +1024,13 @@ define <16 x i8> @trunc_v4i64_to_v4i8_return_v16i8(<4 x i64> %vec) nounwind {
|
||||||
;
|
;
|
||||||
; AVX512BWVL-LABEL: trunc_v4i64_to_v4i8_return_v16i8:
|
; AVX512BWVL-LABEL: trunc_v4i64_to_v4i8_return_v16i8:
|
||||||
; AVX512BWVL: # %bb.0:
|
; AVX512BWVL: # %bb.0:
|
||||||
; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512BWVL-NEXT: vpmovqb %ymm0, %xmm0
|
||||||
; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[u],zero
|
|
||||||
; AVX512BWVL-NEXT: vzeroupper
|
; AVX512BWVL-NEXT: vzeroupper
|
||||||
; AVX512BWVL-NEXT: retq
|
; AVX512BWVL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VBMIVL-LABEL: trunc_v4i64_to_v4i8_return_v16i8:
|
; AVX512VBMIVL-LABEL: trunc_v4i64_to_v4i8_return_v16i8:
|
||||||
; AVX512VBMIVL: # %bb.0:
|
; AVX512VBMIVL: # %bb.0:
|
||||||
; AVX512VBMIVL-NEXT: vpmovqd %ymm0, %xmm0
|
; AVX512VBMIVL-NEXT: vpmovqb %ymm0, %xmm0
|
||||||
; AVX512VBMIVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[u],zero
|
|
||||||
; AVX512VBMIVL-NEXT: vzeroupper
|
; AVX512VBMIVL-NEXT: vzeroupper
|
||||||
; AVX512VBMIVL-NEXT: retq
|
; AVX512VBMIVL-NEXT: retq
|
||||||
%truncated = trunc <4 x i64> %vec to <4 x i8>
|
%truncated = trunc <4 x i64> %vec to <4 x i8>
|
||||||
|
|
|
@ -945,8 +945,7 @@ define <4 x double> @PR34175(<32 x i16>* %p) {
|
||||||
define <16 x i8> @trunc_v8i64_to_v8i8_return_v16i8(<8 x i64> %vec) nounwind {
|
define <16 x i8> @trunc_v8i64_to_v8i8_return_v16i8(<8 x i64> %vec) nounwind {
|
||||||
; AVX512-LABEL: trunc_v8i64_to_v8i8_return_v16i8:
|
; AVX512-LABEL: trunc_v8i64_to_v8i8_return_v16i8:
|
||||||
; AVX512: # %bb.0:
|
; AVX512: # %bb.0:
|
||||||
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
|
; AVX512-NEXT: vpmovqb %zmm0, %xmm0
|
||||||
; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
|
|
||||||
; AVX512-NEXT: vzeroupper
|
; AVX512-NEXT: vzeroupper
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%truncated = trunc <8 x i64> %vec to <8 x i8>
|
%truncated = trunc <8 x i64> %vec to <8 x i8>
|
||||||
|
|
Loading…
Reference in New Issue