[X86][SSE] Add support for combining target shuffles to SHUFPD.

llvm-svn: 289407
This commit is contained in:
Simon Pilgrim 2016-12-11 21:26:25 +00:00
parent 0a1476c756
commit 831435cb14
4 changed files with 52 additions and 27 deletions

View File

@ -11867,35 +11867,52 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
SubLaneMask);
}
static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
unsigned &ShuffleImm,
ArrayRef<int> Mask) {
int NumElts = VT.getVectorNumElements();
assert(VT.getScalarType() == MVT::f64 &&
(NumElts == 2 || NumElts == 4 || NumElts == 8) &&
"Unexpected data type for VSHUFPD");
// Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, ..
// Mask for V4F64; 0/1, 4/5, 2/3, 6/7..
assert(VT.getScalarSizeInBits() == 64 && "Unexpected data type for VSHUFPD");
int NumElts = VT.getVectorNumElements();
ShuffleImm = 0;
bool ShufpdMask = true;
bool CommutableMask = true;
unsigned Immediate = 0;
for (int i = 0; i < NumElts; ++i) {
if (Mask[i] < 0)
if (Mask[i] == SM_SentinelUndef)
continue;
if (Mask[i] < 0)
return false;
int Val = (i & 6) + NumElts * (i & 1);
int CommutVal = (i & 0xe) + NumElts * ((i & 1)^1);
if (Mask[i] < Val || Mask[i] > Val + 1)
int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1);
if (Mask[i] < Val || Mask[i] > Val + 1)
ShufpdMask = false;
if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1)
if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1)
CommutableMask = false;
Immediate |= (Mask[i] % 2) << i;
ShuffleImm |= (Mask[i] % 2) << i;
}
if (ShufpdMask)
return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
DAG.getConstant(Immediate, DL, MVT::i8));
if (CommutableMask)
return DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,
DAG.getConstant(Immediate, DL, MVT::i8));
return SDValue();
return true;
if (CommutableMask) {
std::swap(V1, V2);
return true;
}
return false;
}
static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
unsigned Immediate = 0;
if (!matchVectorShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask))
return SDValue();
return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
DAG.getConstant(Immediate, DL, MVT::i8));
}
static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
@ -26199,6 +26216,17 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
// Attempt to combine to SHUFPD.
if ((MaskVT == MVT::v2f64 && Subtarget.hasSSE2()) ||
(MaskVT == MVT::v4f64 && Subtarget.hasAVX()) ||
(MaskVT == MVT::v8f64 && Subtarget.hasAVX512())) {
if (matchVectorShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) {
Shuffle = X86ISD::SHUFP;
ShuffleVT = MaskVT;
return true;
}
}
return false;
}

View File

@ -687,9 +687,8 @@ define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[3],ymm0[3]
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
; AVX1-NEXT: retq
;

View File

@ -858,14 +858,12 @@ define <8 x double> @combine_vpermi2var_8f64_identity(<8 x double> %x0, <8 x dou
define <8 x double> @combine_vpermi2var_8f64_as_shufpd(<8 x double> %x0, <8 x double> %x1) {
; X32-LABEL: combine_vpermi2var_8f64_as_shufpd:
; X32: # BB#0:
; X32-NEXT: vmovapd {{.*#+}} zmm2 = [1,0,8,0,2,0,10,0,5,0,13,0,6,0,15,0]
; X32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
; X32-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1],zmm1[0],zmm0[2],zmm1[2],zmm0[5],zmm1[5],zmm0[6],zmm1[7]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_8f64_as_shufpd:
; X64: # BB#0:
; X64-NEXT: vmovapd {{.*#+}} zmm2 = [1,8,2,10,5,13,6,15]
; X64-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
; X64-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[1],zmm1[0],zmm0[2],zmm1[2],zmm0[5],zmm1[5],zmm0[6],zmm1[7]
; X64-NEXT: retq
%1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> <i64 1, i64 8, i64 2, i64 10, i64 5, i64 13, i64 6, i64 15>, <8 x double> %x1, i8 -1)
ret <8 x double> %1

View File

@ -150,12 +150,12 @@ define <4 x float> @combine_vpermil2ps_blend_with_zero(<4 x float> %a0, <4 x flo
define <2 x double> @combine_vpermil2pd_as_shufpd(<2 x double> %a0, <2 x double> %a1) {
; X32-LABEL: combine_vpermil2pd_as_shufpd:
; X32: # BB#0:
; X32-NEXT: vpermil2pd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; X32-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2pd_as_shufpd:
; X64: # BB#0:
; X64-NEXT: vpermil2pd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; X64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; X64-NEXT: retq
%res0 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> <i64 2, i64 4>, i8 0)
ret <2 x double> %res0
@ -164,12 +164,12 @@ define <2 x double> @combine_vpermil2pd_as_shufpd(<2 x double> %a0, <2 x double>
define <4 x double> @combine_vpermil2pd256_as_shufpd(<4 x double> %a0, <4 x double> %a1) {
; X32-LABEL: combine_vpermil2pd256_as_shufpd:
; X32: # BB#0:
; X32-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
; X32-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2pd256_as_shufpd:
; X64: # BB#0:
; X64-NEXT: vpermil2pd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
; X64-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
; X64-NEXT: retq
%res0 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> <i64 0, i64 4, i64 2, i64 7>, i8 0)
ret <4 x double> %res0