forked from OSchip/llvm-project
[X86][AVX512] Add support for 512-bit shuffle lowering to VPERMPD/VPERMQ
llvm-svn: 274473
This commit is contained in:
parent
d1eca0f32c
commit
7f096de0b8
|
@ -7040,10 +7040,10 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
|
|||
return false;
|
||||
}
|
||||
|
||||
/// \brief Test whether a shuffle mask is equivalent within each 128-bit lane.
|
||||
/// \brief Test whether a shuffle mask is equivalent within each sub-lane.
|
||||
///
|
||||
/// This checks a shuffle mask to see if it is performing the same
|
||||
/// 128-bit lane-relative shuffle in each 128-bit lane. This trivially implies
|
||||
/// lane-relative shuffle in each sub-lane. This trivially implies
|
||||
/// that it is also not lane-crossing. It may however involve a blend from the
|
||||
/// same lane of a second vector.
|
||||
///
|
||||
|
@ -7051,10 +7051,10 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
|
|||
/// non-trivial to compute in the face of undef lanes. The representation is
|
||||
/// suitable for use with existing 128-bit shuffles as entries from the second
|
||||
/// vector have been remapped to [LaneSize, 2*LaneSize).
|
||||
static bool
|
||||
is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
|
||||
SmallVectorImpl<int> &RepeatedMask) {
|
||||
int LaneSize = 128 / VT.getScalarSizeInBits();
|
||||
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
|
||||
ArrayRef<int> Mask,
|
||||
SmallVectorImpl<int> &RepeatedMask) {
|
||||
int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
|
||||
RepeatedMask.assign(LaneSize, -1);
|
||||
int Size = Mask.size();
|
||||
for (int i = 0; i < Size; ++i) {
|
||||
|
@ -7078,6 +7078,20 @@ is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
|
|||
return true;
|
||||
}
|
||||
|
||||
/// Test whether a shuffle mask is equivalent within each 128-bit lane.
|
||||
static bool
|
||||
is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
|
||||
SmallVectorImpl<int> &RepeatedMask) {
|
||||
return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);
|
||||
}
|
||||
|
||||
/// Test whether a shuffle mask is equivalent within each 256-bit lane.
|
||||
static bool
|
||||
is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
|
||||
SmallVectorImpl<int> &RepeatedMask) {
|
||||
return isRepeatedShuffleMask(256, VT, Mask, RepeatedMask);
|
||||
}
|
||||
|
||||
/// \brief Checks whether a shuffle mask is equivalent to an explicit list of
|
||||
/// arguments.
|
||||
///
|
||||
|
@ -11732,6 +11746,11 @@ static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1,
|
||||
DAG.getConstant(VPERMILPMask, DL, MVT::i8));
|
||||
}
|
||||
|
||||
SmallVector<int, 4> RepeatedMask;
|
||||
if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask))
|
||||
return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8f64, V1,
|
||||
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
|
||||
}
|
||||
|
||||
if (SDValue Shuf128 =
|
||||
|
@ -11791,16 +11810,17 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG))
|
||||
return Shuf128;
|
||||
|
||||
// When the shuffle is mirrored between the 128-bit lanes of the unit, we can
|
||||
// use lower latency instructions that will operate on both 128-bit lanes.
|
||||
SmallVector<int, 2> RepeatedMask;
|
||||
if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, RepeatedMask)) {
|
||||
if (V2.isUndef()) {
|
||||
if (V2.isUndef()) {
|
||||
// When the shuffle is mirrored between the 128-bit lanes of the unit, we
|
||||
// can use lower latency instructions that will operate on all four
|
||||
// 128-bit lanes.
|
||||
SmallVector<int, 2> Repeated128Mask;
|
||||
if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) {
|
||||
int PSHUFDMask[] = {-1, -1, -1, -1};
|
||||
for (int i = 0; i < 2; ++i)
|
||||
if (RepeatedMask[i] >= 0) {
|
||||
PSHUFDMask[2 * i] = 2 * RepeatedMask[i];
|
||||
PSHUFDMask[2 * i + 1] = 2 * RepeatedMask[i] + 1;
|
||||
if (Repeated128Mask[i] >= 0) {
|
||||
PSHUFDMask[2 * i] = 2 * Repeated128Mask[i];
|
||||
PSHUFDMask[2 * i + 1] = 2 * Repeated128Mask[i] + 1;
|
||||
}
|
||||
return DAG.getBitcast(
|
||||
MVT::v8i64,
|
||||
|
@ -11808,6 +11828,11 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
DAG.getBitcast(MVT::v16i32, V1),
|
||||
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
|
||||
}
|
||||
|
||||
SmallVector<int, 4> Repeated256Mask;
|
||||
if (is256BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated256Mask))
|
||||
return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8i64, V1,
|
||||
getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG));
|
||||
}
|
||||
|
||||
// Try to use shift instructions.
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
|
||||
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
|
||||
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
|
||||
; AVX512F-LABEL: shuffle_v8f64_00000000:
|
||||
; AVX512F: # BB#0:
|
||||
|
@ -172,12 +170,12 @@ define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
|
|||
define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
|
||||
; AVX512F-LABEL: shuffle_v8f64_01014545:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
|
||||
; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8f64_01014545:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
|
||||
; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
|
||||
ret <8 x double> %shuffle
|
||||
|
@ -435,14 +433,12 @@ define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8f64_00014445:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,4,4,4,5]
|
||||
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8f64_00014445:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,4,0,4,0,4,0,5,0]
|
||||
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
|
||||
ret <8 x double> %shuffle
|
||||
|
@ -452,14 +448,12 @@ define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8f64_00204464:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,4,6,4]
|
||||
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8f64_00204464:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,4,0,6,0,4,0]
|
||||
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
|
||||
ret <8 x double> %shuffle
|
||||
|
@ -469,14 +463,12 @@ define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8f64_03004744:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,7,4,4]
|
||||
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8f64_03004744:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,7,0,4,0,4,0]
|
||||
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
|
||||
ret <8 x double> %shuffle
|
||||
|
@ -486,14 +478,12 @@ define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8f64_10005444:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,5,4,4,4]
|
||||
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8f64_10005444:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,5,0,4,0,4,0,4,0]
|
||||
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
|
||||
ret <8 x double> %shuffle
|
||||
|
@ -503,14 +493,12 @@ define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8f64_22006644:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,6,4,4]
|
||||
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8f64_22006644:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,6,0,4,0,4,0]
|
||||
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
|
||||
ret <8 x double> %shuffle
|
||||
|
@ -520,14 +508,12 @@ define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8f64_33307774:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,7,7,4]
|
||||
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8f64_33307774:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,7,0,7,0,4,0]
|
||||
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
|
||||
ret <8 x double> %shuffle
|
||||
|
@ -537,14 +523,12 @@ define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8f64_32107654:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,7,6,5,4]
|
||||
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8f64_32107654:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
|
||||
; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
|
||||
ret <8 x double> %shuffle
|
||||
|
@ -1425,14 +1409,12 @@ define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8i64_00014445:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,4,4,4,5]
|
||||
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8i64_00014445:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,4,0,4,0,4,0,5,0]
|
||||
; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,0,1,4,4,4,5]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
|
||||
ret <8 x i64> %shuffle
|
||||
|
@ -1442,14 +1424,12 @@ define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8i64_00204464:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,4,6,4]
|
||||
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8i64_00204464:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,4,0,6,0,4,0]
|
||||
; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,2,0,4,4,6,4]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
|
||||
ret <8 x i64> %shuffle
|
||||
|
@ -1459,14 +1439,12 @@ define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8i64_03004744:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,7,4,4]
|
||||
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8i64_03004744:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,7,0,4,0,4,0]
|
||||
; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,3,0,0,4,7,4,4]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
|
||||
ret <8 x i64> %shuffle
|
||||
|
@ -1476,14 +1454,12 @@ define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8i64_10005444:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,5,4,4,4]
|
||||
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8i64_10005444:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,5,0,4,0,4,0,4,0]
|
||||
; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
|
||||
ret <8 x i64> %shuffle
|
||||
|
@ -1493,14 +1469,12 @@ define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8i64_22006644:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,6,4,4]
|
||||
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8i64_22006644:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,6,0,4,0,4,0]
|
||||
; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,2,0,0,6,6,4,4]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
|
||||
ret <8 x i64> %shuffle
|
||||
|
@ -1510,14 +1484,12 @@ define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8i64_33307774:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,7,7,4]
|
||||
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8i64_33307774:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,7,0,7,0,4,0]
|
||||
; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,3,3,0,7,7,7,4]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
|
||||
ret <8 x i64> %shuffle
|
||||
|
@ -1527,14 +1499,12 @@ define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8i64_32107654:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,7,6,5,4]
|
||||
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8i64_32107654:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
|
||||
; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
|
||||
ret <8 x i64> %shuffle
|
||||
|
@ -1544,14 +1514,12 @@ define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8i64_00234467:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,4,4,6,7]
|
||||
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8i64_00234467:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,4,0,4,0,6,0,7,0]
|
||||
; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[0,0,2,3,4,4,6,7]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
|
||||
ret <8 x i64> %shuffle
|
||||
|
@ -1606,14 +1574,12 @@ define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8i64_10235467:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,2,3,5,4,6,7]
|
||||
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8i64_10235467:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,3,0,5,0,4,0,6,0,7,0]
|
||||
; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,2,3,5,4,6,7]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
|
||||
ret <8 x i64> %shuffle
|
||||
|
@ -1623,14 +1589,12 @@ define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
|
|||
;
|
||||
; AVX512F-LABEL: shuffle_v8i64_10225466:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,2,2,5,4,6,6]
|
||||
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8i64_10225466:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,2,0,5,0,4,0,6,0,6,0]
|
||||
; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
|
||||
; AVX512F-32-NEXT: vpermq {{.*#+}} zmm0 = zmm0[1,0,2,2,5,4,6,6]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
|
||||
ret <8 x i64> %shuffle
|
||||
|
@ -2269,12 +2233,12 @@ define <8 x double> @shuffle_v8f64_2301uu67(<8 x double> %a0, <8 x double> %a1)
|
|||
define <8 x double> @shuffle_v8f64_2301uuuu(<8 x double> %a0, <8 x double> %a1) {
|
||||
; AVX512F-LABEL: shuffle_v8f64_2301uuuu:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1],zmm0[0,1,0,1]
|
||||
; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8f64_2301uuuu:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm1[2,3,0,1],zmm0[0,1,0,1]
|
||||
; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
|
||||
; AVX512F-32-NEXT: retl
|
||||
%1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <8 x double> %1
|
||||
|
|
Loading…
Reference in New Issue