forked from OSchip/llvm-project
[X86] Combine vpmovdw+vpacksswb into vpmovdb.
This is similar to the combine we already have for vpmovdw+vpackuswb. llvm-svn: 348910
This commit is contained in:
parent
5b69b5e20a
commit
1fe466689b
|
@ -35455,6 +35455,8 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
|
||||||
N1.getScalarValueSizeInBits() == SrcBitsPerElt &&
|
N1.getScalarValueSizeInBits() == SrcBitsPerElt &&
|
||||||
"Unexpected PACKSS/PACKUS input type");
|
"Unexpected PACKSS/PACKUS input type");
|
||||||
|
|
||||||
|
bool IsSigned = (X86ISD::PACKSS == Opcode);
|
||||||
|
|
||||||
// Constant Folding.
|
// Constant Folding.
|
||||||
APInt UndefElts0, UndefElts1;
|
APInt UndefElts0, UndefElts1;
|
||||||
SmallVector<APInt, 32> EltBits0, EltBits1;
|
SmallVector<APInt, 32> EltBits0, EltBits1;
|
||||||
|
@ -35467,7 +35469,6 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
|
||||||
unsigned NumSrcElts = NumDstElts / 2;
|
unsigned NumSrcElts = NumDstElts / 2;
|
||||||
unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
|
unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
|
||||||
unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
|
unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
|
||||||
bool IsSigned = (X86ISD::PACKSS == Opcode);
|
|
||||||
|
|
||||||
APInt Undefs(NumDstElts, 0);
|
APInt Undefs(NumDstElts, 0);
|
||||||
SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt));
|
SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt));
|
||||||
|
@ -35511,15 +35512,14 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
|
||||||
return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
|
return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to combine a PACKUSWB implemented truncate with a regular truncate to
|
// Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
|
||||||
// create a larger truncate.
|
// truncate to create a larger truncate.
|
||||||
// TODO: Match PACKSSWB as well?
|
if (Subtarget.hasAVX512() &&
|
||||||
if (Subtarget.hasAVX512() && Opcode == X86ISD::PACKUS &&
|
|
||||||
N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&
|
N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&
|
||||||
N0.getOperand(0).getValueType() == MVT::v8i32) {
|
N0.getOperand(0).getValueType() == MVT::v8i32) {
|
||||||
|
if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) ||
|
||||||
APInt ZeroMask = APInt::getHighBitsSet(16, 8);
|
(!IsSigned &&
|
||||||
if (DAG.MaskedValueIsZero(N0, ZeroMask)) {
|
DAG.MaskedValueIsZero(N0, APInt::getHighBitsSet(16, 8)))) {
|
||||||
if (Subtarget.hasVLX())
|
if (Subtarget.hasVLX())
|
||||||
return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0));
|
return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0));
|
||||||
|
|
||||||
|
|
|
@ -648,16 +648,14 @@ define <8 x i8> @f64to8sc(<8 x double> %f) {
|
||||||
; NOVL-LABEL: f64to8sc:
|
; NOVL-LABEL: f64to8sc:
|
||||||
; NOVL: # %bb.0:
|
; NOVL: # %bb.0:
|
||||||
; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||||
; NOVL-NEXT: vpmovdw %zmm0, %ymm0
|
; NOVL-NEXT: vpmovdb %zmm0, %xmm0
|
||||||
; NOVL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; NOVL-NEXT: vzeroupper
|
; NOVL-NEXT: vzeroupper
|
||||||
; NOVL-NEXT: retq
|
; NOVL-NEXT: retq
|
||||||
;
|
;
|
||||||
; VL-LABEL: f64to8sc:
|
; VL-LABEL: f64to8sc:
|
||||||
; VL: # %bb.0:
|
; VL: # %bb.0:
|
||||||
; VL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
; VL-NEXT: vcvttpd2dq %zmm0, %ymm0
|
||||||
; VL-NEXT: vpmovdw %ymm0, %xmm0
|
; VL-NEXT: vpmovdb %ymm0, %xmm0
|
||||||
; VL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; VL-NEXT: vzeroupper
|
; VL-NEXT: vzeroupper
|
||||||
; VL-NEXT: retq
|
; VL-NEXT: retq
|
||||||
%res = fptosi <8 x double> %f to <8 x i8>
|
%res = fptosi <8 x double> %f to <8 x i8>
|
||||||
|
|
Loading…
Reference in New Issue