[X86] Combine vpmovdw+vpacksswb into vpmovdb.

This is similar to the combine we already have for vpmovdw+vpackuswb.

llvm-svn: 348910
This commit is contained in:
Craig Topper 2018-12-12 05:56:01 +00:00
parent 5b69b5e20a
commit 1fe466689b
2 changed files with 10 additions and 12 deletions

View File

@ -35455,6 +35455,8 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
N1.getScalarValueSizeInBits() == SrcBitsPerElt && N1.getScalarValueSizeInBits() == SrcBitsPerElt &&
"Unexpected PACKSS/PACKUS input type"); "Unexpected PACKSS/PACKUS input type");
bool IsSigned = (X86ISD::PACKSS == Opcode);
// Constant Folding. // Constant Folding.
APInt UndefElts0, UndefElts1; APInt UndefElts0, UndefElts1;
SmallVector<APInt, 32> EltBits0, EltBits1; SmallVector<APInt, 32> EltBits0, EltBits1;
@ -35467,7 +35469,6 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
unsigned NumSrcElts = NumDstElts / 2; unsigned NumSrcElts = NumDstElts / 2;
unsigned NumDstEltsPerLane = NumDstElts / NumLanes; unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes; unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
bool IsSigned = (X86ISD::PACKSS == Opcode);
APInt Undefs(NumDstElts, 0); APInt Undefs(NumDstElts, 0);
SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt)); SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt));
@ -35511,15 +35512,14 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N)); return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
} }
// Try to combine a PACKUSWB implemented truncate with a regular truncate to // Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
// create a larger truncate. // truncate to create a larger truncate.
// TODO: Match PACKSSWB as well? if (Subtarget.hasAVX512() &&
if (Subtarget.hasAVX512() && Opcode == X86ISD::PACKUS &&
N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 && N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&
N0.getOperand(0).getValueType() == MVT::v8i32) { N0.getOperand(0).getValueType() == MVT::v8i32) {
if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) ||
APInt ZeroMask = APInt::getHighBitsSet(16, 8); (!IsSigned &&
if (DAG.MaskedValueIsZero(N0, ZeroMask)) { DAG.MaskedValueIsZero(N0, APInt::getHighBitsSet(16, 8)))) {
if (Subtarget.hasVLX()) if (Subtarget.hasVLX())
return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0)); return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0));

View File

@ -648,16 +648,14 @@ define <8 x i8> @f64to8sc(<8 x double> %f) {
; NOVL-LABEL: f64to8sc: ; NOVL-LABEL: f64to8sc:
; NOVL: # %bb.0: ; NOVL: # %bb.0:
; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 ; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0
; NOVL-NEXT: vpmovdw %zmm0, %ymm0 ; NOVL-NEXT: vpmovdb %zmm0, %xmm0
; NOVL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; NOVL-NEXT: vzeroupper ; NOVL-NEXT: vzeroupper
; NOVL-NEXT: retq ; NOVL-NEXT: retq
; ;
; VL-LABEL: f64to8sc: ; VL-LABEL: f64to8sc:
; VL: # %bb.0: ; VL: # %bb.0:
; VL-NEXT: vcvttpd2dq %zmm0, %ymm0 ; VL-NEXT: vcvttpd2dq %zmm0, %ymm0
; VL-NEXT: vpmovdw %ymm0, %xmm0 ; VL-NEXT: vpmovdb %ymm0, %xmm0
; VL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; VL-NEXT: vzeroupper ; VL-NEXT: vzeroupper
; VL-NEXT: retq ; VL-NEXT: retq
%res = fptosi <8 x double> %f to <8 x i8> %res = fptosi <8 x double> %f to <8 x i8>